Index: projects/clang700-import/cddl/usr.sbin/dwatch/libexec/Makefile =================================================================== --- projects/clang700-import/cddl/usr.sbin/dwatch/libexec/Makefile (revision 337615) +++ projects/clang700-import/cddl/usr.sbin/dwatch/libexec/Makefile (revision 337616) @@ -1,90 +1,91 @@ # $FreeBSD$ FILESDIR= ${LIBEXECDIR}/dwatch FILES= chmod \ errno \ io \ ip \ kill \ nanosleep \ open \ proc \ rw \ sched \ sendrecv \ tcp \ udp \ udplite \ vop_create \ vop_readdir \ vop_rename \ vop_symlink LINKS= ${LIBEXECDIR}/dwatch/chmod ${LIBEXECDIR}/dwatch/fchmodat LINKS+= ${LIBEXECDIR}/dwatch/chmod ${LIBEXECDIR}/dwatch/lchmod LINKS+= ${LIBEXECDIR}/dwatch/io ${LIBEXECDIR}/dwatch/io-done LINKS+= ${LIBEXECDIR}/dwatch/io ${LIBEXECDIR}/dwatch/io-start LINKS+= ${LIBEXECDIR}/dwatch/ip ${LIBEXECDIR}/dwatch/ip-receive LINKS+= ${LIBEXECDIR}/dwatch/ip ${LIBEXECDIR}/dwatch/ip-send LINKS+= ${LIBEXECDIR}/dwatch/open ${LIBEXECDIR}/dwatch/openat LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-create LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-exec LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-exec-failure LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-exec-success LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-exit LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-signal LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-signal-clear LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-signal-discard LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-signal-send LINKS+= ${LIBEXECDIR}/dwatch/proc ${LIBEXECDIR}/dwatch/proc-status LINKS+= ${LIBEXECDIR}/dwatch/rw ${LIBEXECDIR}/dwatch/read LINKS+= ${LIBEXECDIR}/dwatch/rw ${LIBEXECDIR}/dwatch/write LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-change-pri LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-cpu LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-dequeue LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-enqueue LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-exec LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-lend-pri LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-load-change LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-off-cpu LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-on-cpu LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-preempt LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-pri LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-queue LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-remain-cpu LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-sleep LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-surrender LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-tick LINKS+= ${LIBEXECDIR}/dwatch/sched ${LIBEXECDIR}/dwatch/sched-wakeup LINKS+= ${LIBEXECDIR}/dwatch/sendrecv ${LIBEXECDIR}/dwatch/recv LINKS+= ${LIBEXECDIR}/dwatch/sendrecv ${LIBEXECDIR}/dwatch/recvfrom LINKS+= ${LIBEXECDIR}/dwatch/sendrecv ${LIBEXECDIR}/dwatch/recvmsg LINKS+= ${LIBEXECDIR}/dwatch/sendrecv ${LIBEXECDIR}/dwatch/send LINKS+= ${LIBEXECDIR}/dwatch/sendrecv ${LIBEXECDIR}/dwatch/sendmsg LINKS+= ${LIBEXECDIR}/dwatch/sendrecv ${LIBEXECDIR}/dwatch/sendto +LINKS+= ${LIBEXECDIR}/dwatch/systop ${LIBEXECDIR}/dwatch/systop LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-accept LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-accept-established LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-accept-refused LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-connect LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-connect-established LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-connect-refused LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-connect-request LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-established LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-init LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-io LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-receive LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-refused LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-send LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-state-change LINKS+= ${LIBEXECDIR}/dwatch/tcp ${LIBEXECDIR}/dwatch/tcp-status LINKS+= ${LIBEXECDIR}/dwatch/udp ${LIBEXECDIR}/dwatch/udp-receive LINKS+= ${LIBEXECDIR}/dwatch/udp ${LIBEXECDIR}/dwatch/udp-send LINKS+= ${LIBEXECDIR}/dwatch/udplite ${LIBEXECDIR}/dwatch/udplite-receive LINKS+= ${LIBEXECDIR}/dwatch/udplite ${LIBEXECDIR}/dwatch/udplite-send LINKS+= ${LIBEXECDIR}/dwatch/vop_create ${LIBEXECDIR}/dwatch/vop_lookup LINKS+= ${LIBEXECDIR}/dwatch/vop_create ${LIBEXECDIR}/dwatch/vop_mkdir LINKS+= ${LIBEXECDIR}/dwatch/vop_create ${LIBEXECDIR}/dwatch/vop_mknod LINKS+= ${LIBEXECDIR}/dwatch/vop_create ${LIBEXECDIR}/dwatch/vop_remove LINKS+= ${LIBEXECDIR}/dwatch/vop_create ${LIBEXECDIR}/dwatch/vop_rmdir .include Index: projects/clang700-import/cddl/usr.sbin/dwatch/libexec/systop =================================================================== --- projects/clang700-import/cddl/usr.sbin/dwatch/libexec/systop (nonexistent) +++ projects/clang700-import/cddl/usr.sbin/dwatch/libexec/systop (revision 337616) @@ -0,0 +1,84 @@ +# -*- tab-width: 4 -*- ;; Emacs +# vi: set filetype=sh tabstop=8 shiftwidth=8 noexpandtab :: Vi/ViM +############################################################ IDENT(1) +# +# $Title: dwatch(8) profile for top-like syscall $ +# $Copyright: 2014-2018 Devin Teske. All rights reserved. $ +# $FreeBSD$ +# +############################################################ DESCRIPTION +# +# Every 3 seconds update the screen with syscall consumers. +# +############################################################ PRAGMAS + +# Optional: You can override the default pragmas (shown below) + +DTRACE_PRAGMA=" + option quiet + option aggsortrev +" # END-QUOTE + +############################################################ PROBE + +: ${PROBE:=profile:::tick-3s} + +############################################################ ACTIONS + +exec 9< /dev/null ) +rows="${size%% *}" +cols="${size#* }" + +exec 9<");} this->vp = (struct vnode *)arg0; this->ncp = this->vp != NULL ? this->vp->v_cache_dst.tqh_first : 0; this->mount = this->vp != NULL ? this->vp->v_mount : NULL; /* ptr to vfs we are in */ this->fi_fs = this->mount != NULL ? stringof(this->mount->mnt_stat.f_fstypename) : ""; this->fi_mount = this->mount != NULL ? stringof(this->mount->mnt_stat.f_mntonname) : ""; this->d_name = args[0]->v_cache_dd != NULL ? stringof(args[0]->v_cache_dd->nc_name) : ""; $( awk -v MAX_DEPTH=$MAX_DEPTH ' { sub(/^\\\t/, "\t") } { buf = buf "\t" $0 "\n" } END { sub(/\n$/, "", buf) $0 = buf sub(/^[[:space:]]*/, "") for (DEPTH = 1; DEPTH <= MAX_DEPTH + 1; DEPTH++) { gsub(/DEPTH/, DEPTH) print $0 = buf } } ' <<-EOFDEPTH this->nameDEPTH = ""; EOFDEPTH ) } $PROBE /this->vp == 0 || this->fi_fs == 0 || this->fi_fs == "devfs" || this->fi_fs == ""/ /* probe ID $(( $ID + 1 )) */ {${TRACE:+ printf("<$(( $ID + 1 ))>");} this->ncp = 0; } /*********************************************************/ $PROBE /this->ncp/ /* probe ID $(( $ID + 2 )) (depth 1) */ {${TRACE:+ printf("<$(( $ID + 2 ))>");} this->dvp = this->ncp->nc_dvp != NULL ? this->ncp->nc_dvp->v_cache_dst.tqh_first : 0; this->name1 = this->dvp != 0 ? ( this->dvp->nc_name != 0 ? stringof(this->dvp->nc_name) : "" ) : ""; } $PROBE /this->name1 == 0 || this->fi_fs == 0 || this->fi_fs == "devfs" || this->fi_fs == "" || this->name1 == "/" || this->name1 == ""/ /* probe ID $(( $ID + 3 )) */ {${TRACE:+ printf("<$(( $ID + 3 ))>");} this->dvp = 0; } /*********************************************************/ /* * BEGIN Pathname-depth iterators */ $( awk -v ID=$(( $ID + 4 )) -v MAX_DEPTH=$MAX_DEPTH ' { buf = buf $0 "\n" } END { sub(/\n$/, "", buf) for (DEPTH = 2; DEPTH <= MAX_DEPTH; DEPTH++) { $0 = buf gsub(/DEPTH/, DEPTH) gsub(/IDNUM/, ID++) print } } ' <dvp/ /* probe ID IDNUM (depth DEPTH) */ {${TRACE:+ printf("");} this->dvp = this->dvp->nc_dvp != NULL ? this->dvp->nc_dvp->v_cache_dst.tqh_first : 0; this->nameDEPTH = this->dvp != 0 ? ( this->dvp->nc_name != 0 ? stringof(this->dvp->nc_name) : "" ) : ""; } EOFDEPTH ) $PROBE /this->dvp/ /* probe ID $(( $ID + $MAX_DEPTH + 3 )) */ {${TRACE:+ printf("<$(( $ID + $MAX_DEPTH + 3 ))>");} this->dvp = this->dvp->nc_dvp != NULL ? this->dvp->nc_dvp->v_cache_dst.tqh_first : 0; this->name$(( $MAX_DEPTH + 1 )) = this->dvp != 0 ? ( this->dvp->nc_dvp != NULL ? "..." : "" ) : ""; } /* * END Pathname-depth iterators */ /*********************************************************/ $PROBE /this->fi_mount != 0/ /* probe ID $(( $ID + $MAX_DEPTH + 4 )) */ {${TRACE:+ printf("<$(( $ID + $MAX_DEPTH + 4 ))>"); } /* * Join full path * NB: Up-to but not including the parent directory (joined below) */ this->path = this->fi_mount; this->path = strjoin(this->path, this->fi_mount != 0 ? ( this->fi_mount == "/" ? "" : "/" ) : "/"); $( awk -v MAX_DEPTH=$MAX_DEPTH ' { sub(/^\\\t/, "\t") } { buf = buf "\t" $0 "\n" } END { sub(/\n$/, "", buf) $0 = buf sub(/^[[:space:]]*/, "") for (N = MAX_DEPTH + 1; N > 0; N--) { gsub(/N/, N) print $0 = buf } } ' <<-EOFDEPTH this->path = strjoin(this->path, \ strjoin(this->nameN, this->nameN != "" ? "/" : "")); EOFDEPTH ) /* Join the parent directory name */ this->path = strjoin(this->path, strjoin(this->name = (this->d_name != 0 ? this->d_name : ""), this->name != "" ? "/" : "")); +} EOF ACTIONS=$( cat <&9 ) ID=$(( $ID + $MAX_DEPTH + 5 )) ############################################################ EVENT ACTION EVENT_TEST="this->fi_mount != 0" ############################################################ EVENT DETAILS if [ ! "$CUSTOM_DETAILS" ]; then exec 9<path); EOF EVENT_DETAILS=$( cat <&9 ) fi ################################################################################ # END ################################################################################ Index: projects/clang700-import/cddl =================================================================== --- projects/clang700-import/cddl (revision 337615) +++ projects/clang700-import/cddl (revision 337616) Property changes on: projects/clang700-import/cddl ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/cddl:r337586-337615 Index: projects/clang700-import/contrib/pf/pflogd/pflogd.8 =================================================================== --- projects/clang700-import/contrib/pf/pflogd/pflogd.8 (revision 337615) +++ projects/clang700-import/contrib/pf/pflogd/pflogd.8 (revision 337616) @@ -1,237 +1,237 @@ .\" $OpenBSD: pflogd.8,v 1.37 2008/10/22 08:16:49 henning Exp $ .\" .\" Copyright (c) 2001 Can Erkin Acar. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. The name of the author may not be used to endorse or promote products .\" derived from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd October 22 2008 +.Dd August 11, 2018 .Dt PFLOGD 8 .Os .Sh NAME .Nm pflogd .Nd packet filter logging daemon .Sh SYNOPSIS .Nm pflogd .Bk -words -.Op Fl Dx +.Op Fl \&Dx .Op Fl d Ar delay .Op Fl f Ar filename .Op Fl i Ar interface .Op Fl s Ar snaplen .Op Ar expression .Ek .Sh DESCRIPTION .Nm is a background daemon which reads packets logged by .Xr pf 4 to a .Xr pflog 4 interface, normally .Pa pflog0 , and writes the packets to a logfile (normally .Pa /var/log/pflog ) in .Xr tcpdump 1 binary format. These logs can be reviewed later using the .Fl r option of .Xr tcpdump 1 , hopefully offline in case there are bugs in the packet parsing code of .Xr tcpdump 1 . .Pp .Nm closes and then re-opens the log file when it receives .Dv SIGHUP , permitting .Xr newsyslog 8 to rotate logfiles automatically. .Dv SIGALRM causes .Nm to flush the current logfile buffers to the disk, thus making the most recent logs available. The buffers are also flushed every .Ar delay seconds. .Pp If the log file contains data after a restart or a .Dv SIGHUP , new logs are appended to the existing file. If the existing log file was created with a different snaplen, .Nm temporarily uses the old snaplen to keep the log file consistent. .Pp .Nm tries to preserve the integrity of the log file against I/O errors. Furthermore, integrity of an existing log file is verified before appending. If there is an invalid log file or an I/O error, the log file is moved out of the way and a new one is created. If a new file cannot be created, logging is suspended until a .Dv SIGHUP or a .Dv SIGALRM is received. .Pp .Nm will also log the pcap statistics for the .Xr pflog 4 interface to syslog when a .Dv SIGUSR1 is received. .Pp The options are as follows: .Bl -tag -width Ds .It Fl D Debugging mode. .Nm does not disassociate from the controlling terminal. .It Fl d Ar delay Time in seconds to delay between automatic flushes of the file. This may be specified with a value between 5 and 3600 seconds. If not specified, the default is 60 seconds. .It Fl f Ar filename Log output filename. Default is .Pa /var/log/pflog . .It Fl i Ar interface Specifies the .Xr pflog 4 interface to use. By default, .Nm will use .Ar pflog0 . .It Fl p Ar pidfile Writes a file containing the process ID of the program to .Pa /var/run . The file name has the form .Ao Ar pidfile Ac Ns .pid . The default is .Ar pflogd . .It Fl s Ar snaplen Analyze at most the first .Ar snaplen bytes of data from each packet rather than the default of 116. The default of 116 is adequate for IP, ICMP, TCP, and UDP headers but may truncate protocol information for other protocols. Other file parsers may desire a higher snaplen. .It Fl x Check the integrity of an existing log file, and return. .It Ar expression Selects which packets will be dumped, using the regular language of .Xr tcpdump 1 . .El .Sh FILES .Bl -tag -width /var/run/pflogd.pid -compact .It Pa /var/run/pflogd.pid Process ID of the currently running .Nm . .It Pa /var/log/pflog Default log file. .El .Sh EXAMPLES Log specific tcp packets to a different log file with a large snaplen (useful with a log-all rule to dump complete sessions): .Bd -literal -offset indent # pflogd -s 1600 -f suspicious.log port 80 and host evilhost .Ed .Pp Log from another .Xr pflog 4 interface, excluding specific packets: .Bd -literal -offset indent # pflogd -i pflog3 -f network3.log "not (tcp and port 23)" .Ed .Pp Display binary logs: .Bd -literal -offset indent # tcpdump -n -e -ttt -r /var/log/pflog .Ed .Pp Display the logs in real time (this does not interfere with the operation of .Nm ) : .Bd -literal -offset indent # tcpdump -n -e -ttt -i pflog0 .Ed .Pp Tcpdump has been extended to be able to filter on the pfloghdr structure defined in .Aq Ar net/if_pflog.h . Tcpdump can restrict the output to packets logged on a specified interface, a rule number, a reason, a direction, an IP family or an action. .Pp .Bl -tag -width "ruleset authpf " -compact .It ip Address family equals IPv4. .It ip6 Address family equals IPv6. .It ifname kue0 Interface name equals "kue0". .It on kue0 Interface name equals "kue0". .It ruleset authpf Ruleset name equals "authpf". .It rulenum 10 Rule number equals 10. .It reason match Reason equals match. Also accepts "bad-offset", "fragment", "bad-timestamp", "short", "normalize", "memory", "congestion", "ip-option", "proto-cksum", "state-mismatch", "state-insert", "state-limit", "src-limit", and "synproxy". .It action pass Action equals pass. Also accepts "block". .It inbound The direction was inbound. .It outbound The direction was outbound. .El .Pp Display the logs in real time of inbound packets that were blocked on the wi0 interface: .Bd -literal -offset indent # tcpdump -n -e -ttt -i pflog0 inbound and action block and on wi0 .Ed .Sh SEE ALSO .Xr pcap 3 , .Xr pf 4 , .Xr pflog 4 , .Xr pf.conf 5 , .Xr newsyslog 8 , .Xr tcpdump 1 .Sh HISTORY The .Nm command appeared in .Ox 3.0 . .Sh AUTHORS .Nm was written by .An Can Erkin Acar Aq canacar@openbsd.org . Index: projects/clang700-import/contrib/pf =================================================================== --- projects/clang700-import/contrib/pf (revision 337615) +++ projects/clang700-import/contrib/pf (revision 337616) Property changes on: projects/clang700-import/contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/pf:r336870-337615 Index: projects/clang700-import/sbin/ifconfig/ifieee80211.c =================================================================== --- projects/clang700-import/sbin/ifconfig/ifieee80211.c (revision 337615) +++ projects/clang700-import/sbin/ifconfig/ifieee80211.c (revision 337616) @@ -1,5782 +1,5781 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright 2001 The Aerospace Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of The Aerospace Corporation may not be used to endorse or * promote products derived from this software. * * THIS SOFTWARE IS PROVIDED BY THE AEROSPACE CORPORATION ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /*- * Copyright (c) 1997, 1998, 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* NB: for offsetof */ #include #include #include "ifconfig.h" #include #include #ifndef IEEE80211_FIXED_RATE_NONE #define IEEE80211_FIXED_RATE_NONE 0xff #endif /* XXX need these publicly defined or similar */ #ifndef IEEE80211_NODE_AUTH #define IEEE80211_NODE_AUTH 0x000001 /* authorized for data */ #define IEEE80211_NODE_QOS 0x000002 /* QoS enabled */ #define IEEE80211_NODE_ERP 0x000004 /* ERP enabled */ #define IEEE80211_NODE_PWR_MGT 0x000010 /* power save mode enabled */ #define IEEE80211_NODE_AREF 0x000020 /* authentication ref held */ #define IEEE80211_NODE_HT 0x000040 /* HT enabled */ #define IEEE80211_NODE_HTCOMPAT 0x000080 /* HT setup w/ vendor OUI's */ #define IEEE80211_NODE_WPS 0x000100 /* WPS association */ #define IEEE80211_NODE_TSN 0x000200 /* TSN association */ #define IEEE80211_NODE_AMPDU_RX 0x000400 /* AMPDU rx enabled */ #define IEEE80211_NODE_AMPDU_TX 0x000800 /* AMPDU tx enabled */ #define IEEE80211_NODE_MIMO_PS 0x001000 /* MIMO power save enabled */ #define IEEE80211_NODE_MIMO_RTS 0x002000 /* send RTS in MIMO PS */ #define IEEE80211_NODE_RIFS 0x004000 /* RIFS enabled */ #define IEEE80211_NODE_SGI20 0x008000 /* Short GI in HT20 enabled */ #define IEEE80211_NODE_SGI40 0x010000 /* Short GI in HT40 enabled */ #define IEEE80211_NODE_ASSOCID 0x020000 /* xmit requires associd */ #define IEEE80211_NODE_AMSDU_RX 0x040000 /* AMSDU rx enabled */ #define IEEE80211_NODE_AMSDU_TX 0x080000 /* AMSDU tx enabled */ #define IEEE80211_NODE_VHT 0x100000 /* VHT enabled */ #endif #define MAXCHAN 1536 /* max 1.5K channels */ #define MAXCOL 78 static int col; static char spacer; static void LINE_INIT(char c); static void LINE_BREAK(void); static void LINE_CHECK(const char *fmt, ...); static const char *modename[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = "auto", [IEEE80211_MODE_11A] = "11a", [IEEE80211_MODE_11B] = "11b", [IEEE80211_MODE_11G] = "11g", [IEEE80211_MODE_FH] = "fh", [IEEE80211_MODE_TURBO_A] = "turboA", [IEEE80211_MODE_TURBO_G] = "turboG", [IEEE80211_MODE_STURBO_A] = "sturbo", [IEEE80211_MODE_11NA] = "11na", [IEEE80211_MODE_11NG] = "11ng", [IEEE80211_MODE_HALF] = "half", [IEEE80211_MODE_QUARTER] = "quarter", [IEEE80211_MODE_VHT_2GHZ] = "11acg", [IEEE80211_MODE_VHT_5GHZ] = "11ac", }; static void set80211(int s, int type, int val, int len, void *data); static int get80211(int s, int type, void *data, int len); static int get80211len(int s, int type, void *data, int len, int *plen); static int get80211val(int s, int type, int *val); static const char *get_string(const char *val, const char *sep, u_int8_t *buf, int *lenp); static void print_string(const u_int8_t *buf, int len); static void print_regdomain(const struct ieee80211_regdomain *, int); static void print_channels(int, const struct ieee80211req_chaninfo *, int allchans, int verbose); static void regdomain_makechannels(struct ieee80211_regdomain_req *, const struct ieee80211_devcaps_req *); static const char *mesh_linkstate_string(uint8_t state); static struct ieee80211req_chaninfo *chaninfo; static struct ieee80211_regdomain regdomain; static int gotregdomain = 0; static struct ieee80211_roamparams_req roamparams; static int gotroam = 0; static struct ieee80211_txparams_req txparams; static int gottxparams = 0; static struct ieee80211_channel curchan; static int gotcurchan = 0; static struct ifmediareq *ifmr; static int htconf = 0; static int gothtconf = 0; static void gethtconf(int s) { if (gothtconf) return; if (get80211val(s, IEEE80211_IOC_HTCONF, &htconf) < 0) warn("unable to get HT configuration information"); gothtconf = 1; } /* VHT */ static int vhtconf = 0; static int gotvhtconf = 0; static void getvhtconf(int s) { if (gotvhtconf) return; if (get80211val(s, IEEE80211_IOC_VHTCONF, &vhtconf) < 0) warn("unable to get VHT configuration information"); gotvhtconf = 1; } /* * Collect channel info from the kernel. We use this (mostly) * to handle mapping between frequency and IEEE channel number. */ static void getchaninfo(int s) { if (chaninfo != NULL) return; chaninfo = malloc(IEEE80211_CHANINFO_SIZE(MAXCHAN)); if (chaninfo == NULL) errx(1, "no space for channel list"); if (get80211(s, IEEE80211_IOC_CHANINFO, chaninfo, IEEE80211_CHANINFO_SIZE(MAXCHAN)) < 0) err(1, "unable to get channel information"); ifmr = ifmedia_getstate(s); gethtconf(s); getvhtconf(s); } static struct regdata * getregdata(void) { static struct regdata *rdp = NULL; if (rdp == NULL) { rdp = lib80211_alloc_regdata(); if (rdp == NULL) errx(-1, "missing or corrupted regdomain database"); } return rdp; } /* * Given the channel at index i with attributes from, * check if there is a channel with attributes to in * the channel table. With suitable attributes this * allows the caller to look for promotion; e.g. from * 11b > 11g. */ static int canpromote(int i, int from, int to) { const struct ieee80211_channel *fc = &chaninfo->ic_chans[i]; u_int j; if ((fc->ic_flags & from) != from) return i; /* NB: quick check exploiting ordering of chans w/ same frequency */ if (i+1 < chaninfo->ic_nchans && chaninfo->ic_chans[i+1].ic_freq == fc->ic_freq && (chaninfo->ic_chans[i+1].ic_flags & to) == to) return i+1; /* brute force search in case channel list is not ordered */ for (j = 0; j < chaninfo->ic_nchans; j++) { const struct ieee80211_channel *tc = &chaninfo->ic_chans[j]; if (j != i && tc->ic_freq == fc->ic_freq && (tc->ic_flags & to) == to) return j; } return i; } /* * Handle channel promotion. When a channel is specified with * only a frequency we want to promote it to the ``best'' channel * available. The channel list has separate entries for 11b, 11g, * 11a, and 11n[ga] channels so specifying a frequency w/o any * attributes requires we upgrade, e.g. from 11b -> 11g. This * gets complicated when the channel is specified on the same * command line with a media request that constrains the available * channe list (e.g. mode 11a); we want to honor that to avoid * confusing behaviour. */ /* * XXX VHT */ static int promote(int i) { /* * Query the current mode of the interface in case it's * constrained (e.g. to 11a). We must do this carefully * as there may be a pending ifmedia request in which case * asking the kernel will give us the wrong answer. This * is an unfortunate side-effect of the way ifconfig is * structure for modularity (yech). * * NB: ifmr is actually setup in getchaninfo (above); we * assume it's called coincident with to this call so * we have a ``current setting''; otherwise we must pass * the socket descriptor down to here so we can make * the ifmedia_getstate call ourselves. */ int chanmode = ifmr != NULL ? IFM_MODE(ifmr->ifm_current) : IFM_AUTO; /* when ambiguous promote to ``best'' */ /* NB: we abitrarily pick HT40+ over HT40- */ if (chanmode != IFM_IEEE80211_11B) i = canpromote(i, IEEE80211_CHAN_B, IEEE80211_CHAN_G); if (chanmode != IFM_IEEE80211_11G && (htconf & 1)) { i = canpromote(i, IEEE80211_CHAN_G, IEEE80211_CHAN_G | IEEE80211_CHAN_HT20); if (htconf & 2) { i = canpromote(i, IEEE80211_CHAN_G, IEEE80211_CHAN_G | IEEE80211_CHAN_HT40D); i = canpromote(i, IEEE80211_CHAN_G, IEEE80211_CHAN_G | IEEE80211_CHAN_HT40U); } } if (chanmode != IFM_IEEE80211_11A && (htconf & 1)) { i = canpromote(i, IEEE80211_CHAN_A, IEEE80211_CHAN_A | IEEE80211_CHAN_HT20); if (htconf & 2) { i = canpromote(i, IEEE80211_CHAN_A, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D); i = canpromote(i, IEEE80211_CHAN_A, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U); } } return i; } static void mapfreq(struct ieee80211_channel *chan, int freq, int flags) { u_int i; for (i = 0; i < chaninfo->ic_nchans; i++) { const struct ieee80211_channel *c = &chaninfo->ic_chans[i]; if (c->ic_freq == freq && (c->ic_flags & flags) == flags) { if (flags == 0) { /* when ambiguous promote to ``best'' */ c = &chaninfo->ic_chans[promote(i)]; } *chan = *c; return; } } errx(1, "unknown/undefined frequency %u/0x%x", freq, flags); } static void mapchan(struct ieee80211_channel *chan, int ieee, int flags) { u_int i; for (i = 0; i < chaninfo->ic_nchans; i++) { const struct ieee80211_channel *c = &chaninfo->ic_chans[i]; if (c->ic_ieee == ieee && (c->ic_flags & flags) == flags) { if (flags == 0) { /* when ambiguous promote to ``best'' */ c = &chaninfo->ic_chans[promote(i)]; } *chan = *c; return; } } errx(1, "unknown/undefined channel number %d flags 0x%x", ieee, flags); } static const struct ieee80211_channel * getcurchan(int s) { if (gotcurchan) return &curchan; if (get80211(s, IEEE80211_IOC_CURCHAN, &curchan, sizeof(curchan)) < 0) { int val; /* fall back to legacy ioctl */ if (get80211val(s, IEEE80211_IOC_CHANNEL, &val) < 0) err(-1, "cannot figure out current channel"); getchaninfo(s); mapchan(&curchan, val, 0); } gotcurchan = 1; return &curchan; } static enum ieee80211_phymode chan2mode(const struct ieee80211_channel *c) { if (IEEE80211_IS_CHAN_VHTA(c)) return IEEE80211_MODE_VHT_5GHZ; if (IEEE80211_IS_CHAN_VHTG(c)) return IEEE80211_MODE_VHT_2GHZ; if (IEEE80211_IS_CHAN_HTA(c)) return IEEE80211_MODE_11NA; if (IEEE80211_IS_CHAN_HTG(c)) return IEEE80211_MODE_11NG; if (IEEE80211_IS_CHAN_108A(c)) return IEEE80211_MODE_TURBO_A; if (IEEE80211_IS_CHAN_108G(c)) return IEEE80211_MODE_TURBO_G; if (IEEE80211_IS_CHAN_ST(c)) return IEEE80211_MODE_STURBO_A; if (IEEE80211_IS_CHAN_FHSS(c)) return IEEE80211_MODE_FH; if (IEEE80211_IS_CHAN_HALF(c)) return IEEE80211_MODE_HALF; if (IEEE80211_IS_CHAN_QUARTER(c)) return IEEE80211_MODE_QUARTER; if (IEEE80211_IS_CHAN_A(c)) return IEEE80211_MODE_11A; if (IEEE80211_IS_CHAN_ANYG(c)) return IEEE80211_MODE_11G; if (IEEE80211_IS_CHAN_B(c)) return IEEE80211_MODE_11B; return IEEE80211_MODE_AUTO; } static void getroam(int s) { if (gotroam) return; if (get80211(s, IEEE80211_IOC_ROAM, &roamparams, sizeof(roamparams)) < 0) err(1, "unable to get roaming parameters"); gotroam = 1; } static void setroam_cb(int s, void *arg) { struct ieee80211_roamparams_req *roam = arg; set80211(s, IEEE80211_IOC_ROAM, 0, sizeof(*roam), roam); } static void gettxparams(int s) { if (gottxparams) return; if (get80211(s, IEEE80211_IOC_TXPARAMS, &txparams, sizeof(txparams)) < 0) err(1, "unable to get transmit parameters"); gottxparams = 1; } static void settxparams_cb(int s, void *arg) { struct ieee80211_txparams_req *txp = arg; set80211(s, IEEE80211_IOC_TXPARAMS, 0, sizeof(*txp), txp); } static void getregdomain(int s) { if (gotregdomain) return; if (get80211(s, IEEE80211_IOC_REGDOMAIN, ®domain, sizeof(regdomain)) < 0) err(1, "unable to get regulatory domain info"); gotregdomain = 1; } static void getdevcaps(int s, struct ieee80211_devcaps_req *dc) { if (get80211(s, IEEE80211_IOC_DEVCAPS, dc, IEEE80211_DEVCAPS_SPACE(dc)) < 0) err(1, "unable to get device capabilities"); } static void setregdomain_cb(int s, void *arg) { struct ieee80211_regdomain_req *req; struct ieee80211_regdomain *rd = arg; struct ieee80211_devcaps_req *dc; struct regdata *rdp = getregdata(); if (rd->country != NO_COUNTRY) { const struct country *cc; /* * Check current country seting to make sure it's * compatible with the new regdomain. If not, then * override it with any default country for this * SKU. If we cannot arrange a match, then abort. */ cc = lib80211_country_findbycc(rdp, rd->country); if (cc == NULL) errx(1, "unknown ISO country code %d", rd->country); if (cc->rd->sku != rd->regdomain) { const struct regdomain *rp; /* * Check if country is incompatible with regdomain. * To enable multiple regdomains for a country code * we permit a mismatch between the regdomain and * the country's associated regdomain when the * regdomain is setup w/o a default country. For * example, US is bound to the FCC regdomain but * we allow US to be combined with FCC3 because FCC3 * has not default country. This allows bogus * combinations like FCC3+DK which are resolved when * constructing the channel list by deferring to the * regdomain to construct the channel list. */ rp = lib80211_regdomain_findbysku(rdp, rd->regdomain); if (rp == NULL) errx(1, "country %s (%s) is not usable with " "regdomain %d", cc->isoname, cc->name, rd->regdomain); else if (rp->cc != NULL && rp->cc != cc) errx(1, "country %s (%s) is not usable with " "regdomain %s", cc->isoname, cc->name, rp->name); } } /* * Fetch the device capabilities and calculate the * full set of netbands for which we request a new * channel list be constructed. Once that's done we * push the regdomain info + channel list to the kernel. */ dc = malloc(IEEE80211_DEVCAPS_SIZE(MAXCHAN)); if (dc == NULL) errx(1, "no space for device capabilities"); dc->dc_chaninfo.ic_nchans = MAXCHAN; getdevcaps(s, dc); #if 0 if (verbose) { printf("drivercaps: 0x%x\n", dc->dc_drivercaps); printf("cryptocaps: 0x%x\n", dc->dc_cryptocaps); printf("htcaps : 0x%x\n", dc->dc_htcaps); printf("vhtcaps : 0x%x\n", dc->dc_vhtcaps); #if 0 memcpy(chaninfo, &dc->dc_chaninfo, IEEE80211_CHANINFO_SPACE(&dc->dc_chaninfo)); print_channels(s, &dc->dc_chaninfo, 1/*allchans*/, 1/*verbose*/); #endif } #endif req = malloc(IEEE80211_REGDOMAIN_SIZE(dc->dc_chaninfo.ic_nchans)); if (req == NULL) errx(1, "no space for regdomain request"); req->rd = *rd; regdomain_makechannels(req, dc); if (verbose) { LINE_INIT(':'); print_regdomain(rd, 1/*verbose*/); LINE_BREAK(); /* blech, reallocate channel list for new data */ if (chaninfo != NULL) free(chaninfo); chaninfo = malloc(IEEE80211_CHANINFO_SPACE(&req->chaninfo)); if (chaninfo == NULL) errx(1, "no space for channel list"); memcpy(chaninfo, &req->chaninfo, IEEE80211_CHANINFO_SPACE(&req->chaninfo)); print_channels(s, &req->chaninfo, 1/*allchans*/, 1/*verbose*/); } if (req->chaninfo.ic_nchans == 0) errx(1, "no channels calculated"); set80211(s, IEEE80211_IOC_REGDOMAIN, 0, IEEE80211_REGDOMAIN_SPACE(req), req); free(req); free(dc); } static int ieee80211_mhz2ieee(int freq, int flags) { struct ieee80211_channel chan; mapfreq(&chan, freq, flags); return chan.ic_ieee; } static int isanyarg(const char *arg) { return (strncmp(arg, "-", 1) == 0 || strncasecmp(arg, "any", 3) == 0 || strncasecmp(arg, "off", 3) == 0); } static void set80211ssid(const char *val, int d, int s, const struct afswtch *rafp) { int ssid; int len; u_int8_t data[IEEE80211_NWID_LEN]; ssid = 0; len = strlen(val); if (len > 2 && isdigit((int)val[0]) && val[1] == ':') { ssid = atoi(val)-1; val += 2; } bzero(data, sizeof(data)); len = sizeof(data); if (get_string(val, NULL, data, &len) == NULL) exit(1); set80211(s, IEEE80211_IOC_SSID, ssid, len, data); } static void set80211meshid(const char *val, int d, int s, const struct afswtch *rafp) { int len; u_int8_t data[IEEE80211_NWID_LEN]; memset(data, 0, sizeof(data)); len = sizeof(data); if (get_string(val, NULL, data, &len) == NULL) exit(1); set80211(s, IEEE80211_IOC_MESH_ID, 0, len, data); } static void set80211stationname(const char *val, int d, int s, const struct afswtch *rafp) { int len; u_int8_t data[33]; bzero(data, sizeof(data)); len = sizeof(data); get_string(val, NULL, data, &len); set80211(s, IEEE80211_IOC_STATIONNAME, 0, len, data); } /* * Parse a channel specification for attributes/flags. * The syntax is: * freq/xx channel width (5,10,20,40,40+,40-) * freq:mode channel mode (a,b,g,h,n,t,s,d) * * These can be combined in either order; e.g. 2437:ng/40. * Modes are case insensitive. * * The result is not validated here; it's assumed to be * checked against the channel table fetched from the kernel. */ static int getchannelflags(const char *val, int freq) { #define _CHAN_HT 0x80000000 const char *cp; int flags; int is_vht = 0; flags = 0; cp = strchr(val, ':'); if (cp != NULL) { for (cp++; isalpha((int) *cp); cp++) { /* accept mixed case */ int c = *cp; if (isupper(c)) c = tolower(c); switch (c) { case 'a': /* 802.11a */ flags |= IEEE80211_CHAN_A; break; case 'b': /* 802.11b */ flags |= IEEE80211_CHAN_B; break; case 'g': /* 802.11g */ flags |= IEEE80211_CHAN_G; break; case 'v': /* vht: 802.11ac */ is_vht = 1; /* Fallthrough */ case 'h': /* ht = 802.11n */ case 'n': /* 802.11n */ flags |= _CHAN_HT; /* NB: private */ break; case 'd': /* dt = Atheros Dynamic Turbo */ flags |= IEEE80211_CHAN_TURBO; break; case 't': /* ht, dt, st, t */ /* dt and unadorned t specify Dynamic Turbo */ if ((flags & (IEEE80211_CHAN_STURBO|_CHAN_HT)) == 0) flags |= IEEE80211_CHAN_TURBO; break; case 's': /* st = Atheros Static Turbo */ flags |= IEEE80211_CHAN_STURBO; break; default: errx(-1, "%s: Invalid channel attribute %c\n", val, *cp); } } } cp = strchr(val, '/'); if (cp != NULL) { char *ep; u_long cw = strtoul(cp+1, &ep, 10); switch (cw) { case 5: flags |= IEEE80211_CHAN_QUARTER; break; case 10: flags |= IEEE80211_CHAN_HALF; break; case 20: /* NB: this may be removed below */ flags |= IEEE80211_CHAN_HT20; break; case 40: case 80: case 160: /* Handle the 80/160 VHT flag */ if (cw == 80) flags |= IEEE80211_CHAN_VHT80; else if (cw == 160) flags |= IEEE80211_CHAN_VHT160; /* Fallthrough */ if (ep != NULL && *ep == '+') flags |= IEEE80211_CHAN_HT40U; else if (ep != NULL && *ep == '-') flags |= IEEE80211_CHAN_HT40D; break; default: errx(-1, "%s: Invalid channel width\n", val); } } /* * Cleanup specifications. */ if ((flags & _CHAN_HT) == 0) { /* * If user specified freq/20 or freq/40 quietly remove * HT cw attributes depending on channel use. To give * an explicit 20/40 width for an HT channel you must * indicate it is an HT channel since all HT channels * are also usable for legacy operation; e.g. freq:n/40. */ flags &= ~IEEE80211_CHAN_HT; flags &= ~IEEE80211_CHAN_VHT; } else { /* * Remove private indicator that this is an HT channel * and if no explicit channel width has been given * provide the default settings. */ flags &= ~_CHAN_HT; if ((flags & IEEE80211_CHAN_HT) == 0) { struct ieee80211_channel chan; /* * Consult the channel list to see if we can use * HT40+ or HT40- (if both the map routines choose). */ if (freq > 255) mapfreq(&chan, freq, 0); else mapchan(&chan, freq, 0); flags |= (chan.ic_flags & IEEE80211_CHAN_HT); } /* * If VHT is enabled, then also set the VHT flag and the * relevant channel up/down. */ if (is_vht && (flags & IEEE80211_CHAN_HT)) { /* * XXX yes, maybe we should just have VHT, and reuse * HT20/HT40U/HT40D */ if (flags & IEEE80211_CHAN_VHT80) ; else if (flags & IEEE80211_CHAN_HT20) flags |= IEEE80211_CHAN_VHT20; else if (flags & IEEE80211_CHAN_HT40U) flags |= IEEE80211_CHAN_VHT40U; else if (flags & IEEE80211_CHAN_HT40D) flags |= IEEE80211_CHAN_VHT40D; } } return flags; #undef _CHAN_HT } static void getchannel(int s, struct ieee80211_channel *chan, const char *val) { int v, flags; char *eptr; memset(chan, 0, sizeof(*chan)); if (isanyarg(val)) { chan->ic_freq = IEEE80211_CHAN_ANY; return; } getchaninfo(s); errno = 0; v = strtol(val, &eptr, 10); if (val[0] == '\0' || val == eptr || errno == ERANGE || /* channel may be suffixed with nothing, :flag, or /width */ (eptr[0] != '\0' && eptr[0] != ':' && eptr[0] != '/')) errx(1, "invalid channel specification%s", errno == ERANGE ? " (out of range)" : ""); flags = getchannelflags(val, v); if (v > 255) { /* treat as frequency */ mapfreq(chan, v, flags); } else { mapchan(chan, v, flags); } } static void set80211channel(const char *val, int d, int s, const struct afswtch *rafp) { struct ieee80211_channel chan; getchannel(s, &chan, val); set80211(s, IEEE80211_IOC_CURCHAN, 0, sizeof(chan), &chan); } static void set80211chanswitch(const char *val, int d, int s, const struct afswtch *rafp) { struct ieee80211_chanswitch_req csr; getchannel(s, &csr.csa_chan, val); csr.csa_mode = 1; csr.csa_count = 5; set80211(s, IEEE80211_IOC_CHANSWITCH, 0, sizeof(csr), &csr); } static void set80211authmode(const char *val, int d, int s, const struct afswtch *rafp) { int mode; if (strcasecmp(val, "none") == 0) { mode = IEEE80211_AUTH_NONE; } else if (strcasecmp(val, "open") == 0) { mode = IEEE80211_AUTH_OPEN; } else if (strcasecmp(val, "shared") == 0) { mode = IEEE80211_AUTH_SHARED; } else if (strcasecmp(val, "8021x") == 0) { mode = IEEE80211_AUTH_8021X; } else if (strcasecmp(val, "wpa") == 0) { mode = IEEE80211_AUTH_WPA; } else { errx(1, "unknown authmode"); } set80211(s, IEEE80211_IOC_AUTHMODE, mode, 0, NULL); } static void set80211powersavemode(const char *val, int d, int s, const struct afswtch *rafp) { int mode; if (strcasecmp(val, "off") == 0) { mode = IEEE80211_POWERSAVE_OFF; } else if (strcasecmp(val, "on") == 0) { mode = IEEE80211_POWERSAVE_ON; } else if (strcasecmp(val, "cam") == 0) { mode = IEEE80211_POWERSAVE_CAM; } else if (strcasecmp(val, "psp") == 0) { mode = IEEE80211_POWERSAVE_PSP; } else if (strcasecmp(val, "psp-cam") == 0) { mode = IEEE80211_POWERSAVE_PSP_CAM; } else { errx(1, "unknown powersavemode"); } set80211(s, IEEE80211_IOC_POWERSAVE, mode, 0, NULL); } static void set80211powersave(const char *val, int d, int s, const struct afswtch *rafp) { if (d == 0) set80211(s, IEEE80211_IOC_POWERSAVE, IEEE80211_POWERSAVE_OFF, 0, NULL); else set80211(s, IEEE80211_IOC_POWERSAVE, IEEE80211_POWERSAVE_ON, 0, NULL); } static void set80211powersavesleep(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_POWERSAVESLEEP, atoi(val), 0, NULL); } static void set80211wepmode(const char *val, int d, int s, const struct afswtch *rafp) { int mode; if (strcasecmp(val, "off") == 0) { mode = IEEE80211_WEP_OFF; } else if (strcasecmp(val, "on") == 0) { mode = IEEE80211_WEP_ON; } else if (strcasecmp(val, "mixed") == 0) { mode = IEEE80211_WEP_MIXED; } else { errx(1, "unknown wep mode"); } set80211(s, IEEE80211_IOC_WEP, mode, 0, NULL); } static void set80211wep(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_WEP, d, 0, NULL); } static int isundefarg(const char *arg) { return (strcmp(arg, "-") == 0 || strncasecmp(arg, "undef", 5) == 0); } static void set80211weptxkey(const char *val, int d, int s, const struct afswtch *rafp) { if (isundefarg(val)) set80211(s, IEEE80211_IOC_WEPTXKEY, IEEE80211_KEYIX_NONE, 0, NULL); else set80211(s, IEEE80211_IOC_WEPTXKEY, atoi(val)-1, 0, NULL); } static void set80211wepkey(const char *val, int d, int s, const struct afswtch *rafp) { int key = 0; int len; u_int8_t data[IEEE80211_KEYBUF_SIZE]; if (isdigit((int)val[0]) && val[1] == ':') { key = atoi(val)-1; val += 2; } bzero(data, sizeof(data)); len = sizeof(data); get_string(val, NULL, data, &len); set80211(s, IEEE80211_IOC_WEPKEY, key, len, data); } /* * This function is purely a NetBSD compatibility interface. The NetBSD * interface is too inflexible, but it's there so we'll support it since * it's not all that hard. */ static void set80211nwkey(const char *val, int d, int s, const struct afswtch *rafp) { int txkey; int i, len; u_int8_t data[IEEE80211_KEYBUF_SIZE]; set80211(s, IEEE80211_IOC_WEP, IEEE80211_WEP_ON, 0, NULL); if (isdigit((int)val[0]) && val[1] == ':') { txkey = val[0]-'0'-1; val += 2; for (i = 0; i < 4; i++) { bzero(data, sizeof(data)); len = sizeof(data); val = get_string(val, ",", data, &len); if (val == NULL) exit(1); set80211(s, IEEE80211_IOC_WEPKEY, i, len, data); } } else { bzero(data, sizeof(data)); len = sizeof(data); get_string(val, NULL, data, &len); txkey = 0; set80211(s, IEEE80211_IOC_WEPKEY, 0, len, data); bzero(data, sizeof(data)); for (i = 1; i < 4; i++) set80211(s, IEEE80211_IOC_WEPKEY, i, 0, data); } set80211(s, IEEE80211_IOC_WEPTXKEY, txkey, 0, NULL); } static void set80211rtsthreshold(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_RTSTHRESHOLD, isundefarg(val) ? IEEE80211_RTS_MAX : atoi(val), 0, NULL); } static void set80211protmode(const char *val, int d, int s, const struct afswtch *rafp) { int mode; if (strcasecmp(val, "off") == 0) { mode = IEEE80211_PROTMODE_OFF; } else if (strcasecmp(val, "cts") == 0) { mode = IEEE80211_PROTMODE_CTS; } else if (strncasecmp(val, "rtscts", 3) == 0) { mode = IEEE80211_PROTMODE_RTSCTS; } else { errx(1, "unknown protection mode"); } set80211(s, IEEE80211_IOC_PROTMODE, mode, 0, NULL); } static void set80211htprotmode(const char *val, int d, int s, const struct afswtch *rafp) { int mode; if (strcasecmp(val, "off") == 0) { mode = IEEE80211_PROTMODE_OFF; } else if (strncasecmp(val, "rts", 3) == 0) { mode = IEEE80211_PROTMODE_RTSCTS; } else { errx(1, "unknown protection mode"); } set80211(s, IEEE80211_IOC_HTPROTMODE, mode, 0, NULL); } static void set80211txpower(const char *val, int d, int s, const struct afswtch *rafp) { double v = atof(val); int txpow; txpow = (int) (2*v); if (txpow != 2*v) errx(-1, "invalid tx power (must be .5 dBm units)"); set80211(s, IEEE80211_IOC_TXPOWER, txpow, 0, NULL); } #define IEEE80211_ROAMING_DEVICE 0 #define IEEE80211_ROAMING_AUTO 1 #define IEEE80211_ROAMING_MANUAL 2 static void set80211roaming(const char *val, int d, int s, const struct afswtch *rafp) { int mode; if (strcasecmp(val, "device") == 0) { mode = IEEE80211_ROAMING_DEVICE; } else if (strcasecmp(val, "auto") == 0) { mode = IEEE80211_ROAMING_AUTO; } else if (strcasecmp(val, "manual") == 0) { mode = IEEE80211_ROAMING_MANUAL; } else { errx(1, "unknown roaming mode"); } set80211(s, IEEE80211_IOC_ROAMING, mode, 0, NULL); } static void set80211wme(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_WME, d, 0, NULL); } static void set80211hidessid(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_HIDESSID, d, 0, NULL); } static void set80211apbridge(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_APBRIDGE, d, 0, NULL); } static void set80211fastframes(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_FF, d, 0, NULL); } static void set80211dturbo(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_TURBOP, d, 0, NULL); } static void set80211chanlist(const char *val, int d, int s, const struct afswtch *rafp) { struct ieee80211req_chanlist chanlist; char *temp, *cp, *tp; temp = malloc(strlen(val) + 1); if (temp == NULL) errx(1, "malloc failed"); strcpy(temp, val); memset(&chanlist, 0, sizeof(chanlist)); cp = temp; for (;;) { int first, last, f, c; tp = strchr(cp, ','); if (tp != NULL) *tp++ = '\0'; switch (sscanf(cp, "%u-%u", &first, &last)) { case 1: if (first > IEEE80211_CHAN_MAX) errx(-1, "channel %u out of range, max %u", first, IEEE80211_CHAN_MAX); setbit(chanlist.ic_channels, first); break; case 2: if (first > IEEE80211_CHAN_MAX) errx(-1, "channel %u out of range, max %u", first, IEEE80211_CHAN_MAX); if (last > IEEE80211_CHAN_MAX) errx(-1, "channel %u out of range, max %u", last, IEEE80211_CHAN_MAX); if (first > last) errx(-1, "void channel range, %u > %u", first, last); for (f = first; f <= last; f++) setbit(chanlist.ic_channels, f); break; } if (tp == NULL) break; c = *tp; while (isspace(c)) tp++; if (!isdigit(c)) break; cp = tp; } set80211(s, IEEE80211_IOC_CHANLIST, 0, sizeof(chanlist), &chanlist); free(temp); } static void set80211bssid(const char *val, int d, int s, const struct afswtch *rafp) { if (!isanyarg(val)) { char *temp; struct sockaddr_dl sdl; temp = malloc(strlen(val) + 2); /* ':' and '\0' */ if (temp == NULL) errx(1, "malloc failed"); temp[0] = ':'; strcpy(temp + 1, val); sdl.sdl_len = sizeof(sdl); link_addr(temp, &sdl); free(temp); if (sdl.sdl_alen != IEEE80211_ADDR_LEN) errx(1, "malformed link-level address"); set80211(s, IEEE80211_IOC_BSSID, 0, IEEE80211_ADDR_LEN, LLADDR(&sdl)); } else { uint8_t zerobssid[IEEE80211_ADDR_LEN]; memset(zerobssid, 0, sizeof(zerobssid)); set80211(s, IEEE80211_IOC_BSSID, 0, IEEE80211_ADDR_LEN, zerobssid); } } static int getac(const char *ac) { if (strcasecmp(ac, "ac_be") == 0 || strcasecmp(ac, "be") == 0) return WME_AC_BE; if (strcasecmp(ac, "ac_bk") == 0 || strcasecmp(ac, "bk") == 0) return WME_AC_BK; if (strcasecmp(ac, "ac_vi") == 0 || strcasecmp(ac, "vi") == 0) return WME_AC_VI; if (strcasecmp(ac, "ac_vo") == 0 || strcasecmp(ac, "vo") == 0) return WME_AC_VO; errx(1, "unknown wme access class %s", ac); } static DECL_CMD_FUNC2(set80211cwmin, ac, val) { set80211(s, IEEE80211_IOC_WME_CWMIN, atoi(val), getac(ac), NULL); } static DECL_CMD_FUNC2(set80211cwmax, ac, val) { set80211(s, IEEE80211_IOC_WME_CWMAX, atoi(val), getac(ac), NULL); } static DECL_CMD_FUNC2(set80211aifs, ac, val) { set80211(s, IEEE80211_IOC_WME_AIFS, atoi(val), getac(ac), NULL); } static DECL_CMD_FUNC2(set80211txoplimit, ac, val) { set80211(s, IEEE80211_IOC_WME_TXOPLIMIT, atoi(val), getac(ac), NULL); } static DECL_CMD_FUNC(set80211acm, ac, d) { set80211(s, IEEE80211_IOC_WME_ACM, 1, getac(ac), NULL); } static DECL_CMD_FUNC(set80211noacm, ac, d) { set80211(s, IEEE80211_IOC_WME_ACM, 0, getac(ac), NULL); } static DECL_CMD_FUNC(set80211ackpolicy, ac, d) { set80211(s, IEEE80211_IOC_WME_ACKPOLICY, 1, getac(ac), NULL); } static DECL_CMD_FUNC(set80211noackpolicy, ac, d) { set80211(s, IEEE80211_IOC_WME_ACKPOLICY, 0, getac(ac), NULL); } static DECL_CMD_FUNC2(set80211bsscwmin, ac, val) { set80211(s, IEEE80211_IOC_WME_CWMIN, atoi(val), getac(ac)|IEEE80211_WMEPARAM_BSS, NULL); } static DECL_CMD_FUNC2(set80211bsscwmax, ac, val) { set80211(s, IEEE80211_IOC_WME_CWMAX, atoi(val), getac(ac)|IEEE80211_WMEPARAM_BSS, NULL); } static DECL_CMD_FUNC2(set80211bssaifs, ac, val) { set80211(s, IEEE80211_IOC_WME_AIFS, atoi(val), getac(ac)|IEEE80211_WMEPARAM_BSS, NULL); } static DECL_CMD_FUNC2(set80211bsstxoplimit, ac, val) { set80211(s, IEEE80211_IOC_WME_TXOPLIMIT, atoi(val), getac(ac)|IEEE80211_WMEPARAM_BSS, NULL); } static DECL_CMD_FUNC(set80211dtimperiod, val, d) { set80211(s, IEEE80211_IOC_DTIM_PERIOD, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211bintval, val, d) { set80211(s, IEEE80211_IOC_BEACON_INTERVAL, atoi(val), 0, NULL); } static void set80211macmac(int s, int op, const char *val) { char *temp; struct sockaddr_dl sdl; temp = malloc(strlen(val) + 2); /* ':' and '\0' */ if (temp == NULL) errx(1, "malloc failed"); temp[0] = ':'; strcpy(temp + 1, val); sdl.sdl_len = sizeof(sdl); link_addr(temp, &sdl); free(temp); if (sdl.sdl_alen != IEEE80211_ADDR_LEN) errx(1, "malformed link-level address"); set80211(s, op, 0, IEEE80211_ADDR_LEN, LLADDR(&sdl)); } static DECL_CMD_FUNC(set80211addmac, val, d) { set80211macmac(s, IEEE80211_IOC_ADDMAC, val); } static DECL_CMD_FUNC(set80211delmac, val, d) { set80211macmac(s, IEEE80211_IOC_DELMAC, val); } static DECL_CMD_FUNC(set80211kickmac, val, d) { char *temp; struct sockaddr_dl sdl; struct ieee80211req_mlme mlme; temp = malloc(strlen(val) + 2); /* ':' and '\0' */ if (temp == NULL) errx(1, "malloc failed"); temp[0] = ':'; strcpy(temp + 1, val); sdl.sdl_len = sizeof(sdl); link_addr(temp, &sdl); free(temp); if (sdl.sdl_alen != IEEE80211_ADDR_LEN) errx(1, "malformed link-level address"); memset(&mlme, 0, sizeof(mlme)); mlme.im_op = IEEE80211_MLME_DEAUTH; mlme.im_reason = IEEE80211_REASON_AUTH_EXPIRE; memcpy(mlme.im_macaddr, LLADDR(&sdl), IEEE80211_ADDR_LEN); set80211(s, IEEE80211_IOC_MLME, 0, sizeof(mlme), &mlme); } static DECL_CMD_FUNC(set80211maccmd, val, d) { set80211(s, IEEE80211_IOC_MACCMD, d, 0, NULL); } static void set80211meshrtmac(int s, int req, const char *val) { char *temp; struct sockaddr_dl sdl; temp = malloc(strlen(val) + 2); /* ':' and '\0' */ if (temp == NULL) errx(1, "malloc failed"); temp[0] = ':'; strcpy(temp + 1, val); sdl.sdl_len = sizeof(sdl); link_addr(temp, &sdl); free(temp); if (sdl.sdl_alen != IEEE80211_ADDR_LEN) errx(1, "malformed link-level address"); set80211(s, IEEE80211_IOC_MESH_RTCMD, req, IEEE80211_ADDR_LEN, LLADDR(&sdl)); } static DECL_CMD_FUNC(set80211addmeshrt, val, d) { set80211meshrtmac(s, IEEE80211_MESH_RTCMD_ADD, val); } static DECL_CMD_FUNC(set80211delmeshrt, val, d) { set80211meshrtmac(s, IEEE80211_MESH_RTCMD_DELETE, val); } static DECL_CMD_FUNC(set80211meshrtcmd, val, d) { set80211(s, IEEE80211_IOC_MESH_RTCMD, d, 0, NULL); } static DECL_CMD_FUNC(set80211hwmprootmode, val, d) { int mode; if (strcasecmp(val, "normal") == 0) mode = IEEE80211_HWMP_ROOTMODE_NORMAL; else if (strcasecmp(val, "proactive") == 0) mode = IEEE80211_HWMP_ROOTMODE_PROACTIVE; else if (strcasecmp(val, "rann") == 0) mode = IEEE80211_HWMP_ROOTMODE_RANN; else mode = IEEE80211_HWMP_ROOTMODE_DISABLED; set80211(s, IEEE80211_IOC_HWMP_ROOTMODE, mode, 0, NULL); } static DECL_CMD_FUNC(set80211hwmpmaxhops, val, d) { set80211(s, IEEE80211_IOC_HWMP_MAXHOPS, atoi(val), 0, NULL); } static void set80211pureg(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_PUREG, d, 0, NULL); } static void set80211quiet(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_QUIET, d, 0, NULL); } static DECL_CMD_FUNC(set80211quietperiod, val, d) { set80211(s, IEEE80211_IOC_QUIET_PERIOD, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211quietcount, val, d) { set80211(s, IEEE80211_IOC_QUIET_COUNT, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211quietduration, val, d) { set80211(s, IEEE80211_IOC_QUIET_DUR, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211quietoffset, val, d) { set80211(s, IEEE80211_IOC_QUIET_OFFSET, atoi(val), 0, NULL); } static void set80211bgscan(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_BGSCAN, d, 0, NULL); } static DECL_CMD_FUNC(set80211bgscanidle, val, d) { set80211(s, IEEE80211_IOC_BGSCAN_IDLE, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211bgscanintvl, val, d) { set80211(s, IEEE80211_IOC_BGSCAN_INTERVAL, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211scanvalid, val, d) { set80211(s, IEEE80211_IOC_SCANVALID, atoi(val), 0, NULL); } /* * Parse an optional trailing specification of which netbands * to apply a parameter to. This is basically the same syntax * as used for channels but you can concatenate to specify * multiple. For example: * 14:abg apply to 11a, 11b, and 11g * 6:ht apply to 11na and 11ng * We don't make a big effort to catch silly things; this is * really a convenience mechanism. */ static int getmodeflags(const char *val) { const char *cp; int flags; flags = 0; cp = strchr(val, ':'); if (cp != NULL) { for (cp++; isalpha((int) *cp); cp++) { /* accept mixed case */ int c = *cp; if (isupper(c)) c = tolower(c); switch (c) { case 'a': /* 802.11a */ flags |= IEEE80211_CHAN_A; break; case 'b': /* 802.11b */ flags |= IEEE80211_CHAN_B; break; case 'g': /* 802.11g */ flags |= IEEE80211_CHAN_G; break; case 'n': /* 802.11n */ flags |= IEEE80211_CHAN_HT; break; case 'd': /* dt = Atheros Dynamic Turbo */ flags |= IEEE80211_CHAN_TURBO; break; case 't': /* ht, dt, st, t */ /* dt and unadorned t specify Dynamic Turbo */ if ((flags & (IEEE80211_CHAN_STURBO|IEEE80211_CHAN_HT)) == 0) flags |= IEEE80211_CHAN_TURBO; break; case 's': /* st = Atheros Static Turbo */ flags |= IEEE80211_CHAN_STURBO; break; case 'h': /* 1/2-width channels */ flags |= IEEE80211_CHAN_HALF; break; case 'q': /* 1/4-width channels */ flags |= IEEE80211_CHAN_QUARTER; break; case 'v': /* XXX set HT too? */ flags |= IEEE80211_CHAN_VHT; break; default: errx(-1, "%s: Invalid mode attribute %c\n", val, *cp); } } } return flags; } #define IEEE80211_CHAN_HTA (IEEE80211_CHAN_HT|IEEE80211_CHAN_5GHZ) #define IEEE80211_CHAN_HTG (IEEE80211_CHAN_HT|IEEE80211_CHAN_2GHZ) #define _APPLY(_flags, _base, _param, _v) do { \ if (_flags & IEEE80211_CHAN_HT) { \ if ((_flags & (IEEE80211_CHAN_5GHZ|IEEE80211_CHAN_2GHZ)) == 0) {\ _base.params[IEEE80211_MODE_11NA]._param = _v; \ _base.params[IEEE80211_MODE_11NG]._param = _v; \ } else if (_flags & IEEE80211_CHAN_5GHZ) \ _base.params[IEEE80211_MODE_11NA]._param = _v; \ else \ _base.params[IEEE80211_MODE_11NG]._param = _v; \ } \ if (_flags & IEEE80211_CHAN_TURBO) { \ if ((_flags & (IEEE80211_CHAN_5GHZ|IEEE80211_CHAN_2GHZ)) == 0) {\ _base.params[IEEE80211_MODE_TURBO_A]._param = _v; \ _base.params[IEEE80211_MODE_TURBO_G]._param = _v; \ } else if (_flags & IEEE80211_CHAN_5GHZ) \ _base.params[IEEE80211_MODE_TURBO_A]._param = _v; \ else \ _base.params[IEEE80211_MODE_TURBO_G]._param = _v; \ } \ if (_flags & IEEE80211_CHAN_STURBO) \ _base.params[IEEE80211_MODE_STURBO_A]._param = _v; \ if ((_flags & IEEE80211_CHAN_A) == IEEE80211_CHAN_A) \ _base.params[IEEE80211_MODE_11A]._param = _v; \ if ((_flags & IEEE80211_CHAN_G) == IEEE80211_CHAN_G) \ _base.params[IEEE80211_MODE_11G]._param = _v; \ if ((_flags & IEEE80211_CHAN_B) == IEEE80211_CHAN_B) \ _base.params[IEEE80211_MODE_11B]._param = _v; \ if (_flags & IEEE80211_CHAN_HALF) \ _base.params[IEEE80211_MODE_HALF]._param = _v; \ if (_flags & IEEE80211_CHAN_QUARTER) \ _base.params[IEEE80211_MODE_QUARTER]._param = _v; \ } while (0) #define _APPLY1(_flags, _base, _param, _v) do { \ if (_flags & IEEE80211_CHAN_HT) { \ if (_flags & IEEE80211_CHAN_5GHZ) \ _base.params[IEEE80211_MODE_11NA]._param = _v; \ else \ _base.params[IEEE80211_MODE_11NG]._param = _v; \ } else if ((_flags & IEEE80211_CHAN_108A) == IEEE80211_CHAN_108A) \ _base.params[IEEE80211_MODE_TURBO_A]._param = _v; \ else if ((_flags & IEEE80211_CHAN_108G) == IEEE80211_CHAN_108G) \ _base.params[IEEE80211_MODE_TURBO_G]._param = _v; \ else if ((_flags & IEEE80211_CHAN_ST) == IEEE80211_CHAN_ST) \ _base.params[IEEE80211_MODE_STURBO_A]._param = _v; \ else if (_flags & IEEE80211_CHAN_HALF) \ _base.params[IEEE80211_MODE_HALF]._param = _v; \ else if (_flags & IEEE80211_CHAN_QUARTER) \ _base.params[IEEE80211_MODE_QUARTER]._param = _v; \ else if ((_flags & IEEE80211_CHAN_A) == IEEE80211_CHAN_A) \ _base.params[IEEE80211_MODE_11A]._param = _v; \ else if ((_flags & IEEE80211_CHAN_G) == IEEE80211_CHAN_G) \ _base.params[IEEE80211_MODE_11G]._param = _v; \ else if ((_flags & IEEE80211_CHAN_B) == IEEE80211_CHAN_B) \ _base.params[IEEE80211_MODE_11B]._param = _v; \ } while (0) #define _APPLY_RATE(_flags, _base, _param, _v) do { \ if (_flags & IEEE80211_CHAN_HT) { \ (_v) = (_v / 2) | IEEE80211_RATE_MCS; \ } \ _APPLY(_flags, _base, _param, _v); \ } while (0) #define _APPLY_RATE1(_flags, _base, _param, _v) do { \ if (_flags & IEEE80211_CHAN_HT) { \ (_v) = (_v / 2) | IEEE80211_RATE_MCS; \ } \ _APPLY1(_flags, _base, _param, _v); \ } while (0) static DECL_CMD_FUNC(set80211roamrssi, val, d) { double v = atof(val); int rssi, flags; rssi = (int) (2*v); if (rssi != 2*v) errx(-1, "invalid rssi (must be .5 dBm units)"); flags = getmodeflags(val); getroam(s); if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY1(flags, roamparams, rssi, rssi); } else _APPLY(flags, roamparams, rssi, rssi); callback_register(setroam_cb, &roamparams); } static int getrate(const char *val, const char *tag) { double v = atof(val); int rate; rate = (int) (2*v); if (rate != 2*v) errx(-1, "invalid %s rate (must be .5 Mb/s units)", tag); return rate; /* NB: returns 2x the specified value */ } static DECL_CMD_FUNC(set80211roamrate, val, d) { int rate, flags; rate = getrate(val, "roam"); flags = getmodeflags(val); getroam(s); if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY_RATE1(flags, roamparams, rate, rate); } else _APPLY_RATE(flags, roamparams, rate, rate); callback_register(setroam_cb, &roamparams); } static DECL_CMD_FUNC(set80211mcastrate, val, d) { int rate, flags; rate = getrate(val, "mcast"); flags = getmodeflags(val); gettxparams(s); if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY_RATE1(flags, txparams, mcastrate, rate); } else _APPLY_RATE(flags, txparams, mcastrate, rate); callback_register(settxparams_cb, &txparams); } static DECL_CMD_FUNC(set80211mgtrate, val, d) { int rate, flags; rate = getrate(val, "mgmt"); flags = getmodeflags(val); gettxparams(s); if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY_RATE1(flags, txparams, mgmtrate, rate); } else _APPLY_RATE(flags, txparams, mgmtrate, rate); callback_register(settxparams_cb, &txparams); } static DECL_CMD_FUNC(set80211ucastrate, val, d) { int flags; gettxparams(s); flags = getmodeflags(val); if (isanyarg(val)) { if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY1(flags, txparams, ucastrate, IEEE80211_FIXED_RATE_NONE); } else _APPLY(flags, txparams, ucastrate, IEEE80211_FIXED_RATE_NONE); } else { int rate = getrate(val, "ucast"); if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY_RATE1(flags, txparams, ucastrate, rate); } else _APPLY_RATE(flags, txparams, ucastrate, rate); } callback_register(settxparams_cb, &txparams); } static DECL_CMD_FUNC(set80211maxretry, val, d) { int v = atoi(val), flags; flags = getmodeflags(val); gettxparams(s); if (flags == 0) { /* NB: no flags => current channel */ flags = getcurchan(s)->ic_flags; _APPLY1(flags, txparams, maxretry, v); } else _APPLY(flags, txparams, maxretry, v); callback_register(settxparams_cb, &txparams); } #undef _APPLY_RATE #undef _APPLY #undef IEEE80211_CHAN_HTA #undef IEEE80211_CHAN_HTG static DECL_CMD_FUNC(set80211fragthreshold, val, d) { set80211(s, IEEE80211_IOC_FRAGTHRESHOLD, isundefarg(val) ? IEEE80211_FRAG_MAX : atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211bmissthreshold, val, d) { set80211(s, IEEE80211_IOC_BMISSTHRESHOLD, isundefarg(val) ? IEEE80211_HWBMISS_MAX : atoi(val), 0, NULL); } static void set80211burst(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_BURST, d, 0, NULL); } static void set80211doth(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_DOTH, d, 0, NULL); } static void set80211dfs(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_DFS, d, 0, NULL); } static void set80211shortgi(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_SHORTGI, d ? (IEEE80211_HTCAP_SHORTGI20 | IEEE80211_HTCAP_SHORTGI40) : 0, 0, NULL); } /* XXX 11ac density/size is different */ static void set80211ampdu(const char *val, int d, int s, const struct afswtch *rafp) { int ampdu; if (get80211val(s, IEEE80211_IOC_AMPDU, &du) < 0) errx(-1, "cannot set AMPDU setting"); if (d < 0) { d = -d; ampdu &= ~d; } else ampdu |= d; set80211(s, IEEE80211_IOC_AMPDU, ampdu, 0, NULL); } static void set80211stbc(const char *val, int d, int s, const struct afswtch *rafp) { int stbc; if (get80211val(s, IEEE80211_IOC_STBC, &stbc) < 0) errx(-1, "cannot set STBC setting"); if (d < 0) { d = -d; stbc &= ~d; } else stbc |= d; set80211(s, IEEE80211_IOC_STBC, stbc, 0, NULL); } static void set80211ldpc(const char *val, int d, int s, const struct afswtch *rafp) { int ldpc; if (get80211val(s, IEEE80211_IOC_LDPC, &ldpc) < 0) errx(-1, "cannot set LDPC setting"); if (d < 0) { d = -d; ldpc &= ~d; } else ldpc |= d; set80211(s, IEEE80211_IOC_LDPC, ldpc, 0, NULL); } static DECL_CMD_FUNC(set80211ampdulimit, val, d) { int v; switch (atoi(val)) { case 8: case 8*1024: v = IEEE80211_HTCAP_MAXRXAMPDU_8K; break; case 16: case 16*1024: v = IEEE80211_HTCAP_MAXRXAMPDU_16K; break; case 32: case 32*1024: v = IEEE80211_HTCAP_MAXRXAMPDU_32K; break; case 64: case 64*1024: v = IEEE80211_HTCAP_MAXRXAMPDU_64K; break; default: errx(-1, "invalid A-MPDU limit %s", val); } set80211(s, IEEE80211_IOC_AMPDU_LIMIT, v, 0, NULL); } /* XXX 11ac density/size is different */ static DECL_CMD_FUNC(set80211ampdudensity, val, d) { int v; if (isanyarg(val) || strcasecmp(val, "na") == 0) v = IEEE80211_HTCAP_MPDUDENSITY_NA; else switch ((int)(atof(val)*4)) { case 0: v = IEEE80211_HTCAP_MPDUDENSITY_NA; break; case 1: v = IEEE80211_HTCAP_MPDUDENSITY_025; break; case 2: v = IEEE80211_HTCAP_MPDUDENSITY_05; break; case 4: v = IEEE80211_HTCAP_MPDUDENSITY_1; break; case 8: v = IEEE80211_HTCAP_MPDUDENSITY_2; break; case 16: v = IEEE80211_HTCAP_MPDUDENSITY_4; break; case 32: v = IEEE80211_HTCAP_MPDUDENSITY_8; break; case 64: v = IEEE80211_HTCAP_MPDUDENSITY_16; break; default: errx(-1, "invalid A-MPDU density %s", val); } set80211(s, IEEE80211_IOC_AMPDU_DENSITY, v, 0, NULL); } static void set80211amsdu(const char *val, int d, int s, const struct afswtch *rafp) { int amsdu; if (get80211val(s, IEEE80211_IOC_AMSDU, &amsdu) < 0) err(-1, "cannot get AMSDU setting"); if (d < 0) { d = -d; amsdu &= ~d; } else amsdu |= d; set80211(s, IEEE80211_IOC_AMSDU, amsdu, 0, NULL); } static DECL_CMD_FUNC(set80211amsdulimit, val, d) { set80211(s, IEEE80211_IOC_AMSDU_LIMIT, atoi(val), 0, NULL); } static void set80211puren(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_PUREN, d, 0, NULL); } static void set80211htcompat(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_HTCOMPAT, d, 0, NULL); } static void set80211htconf(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_HTCONF, d, 0, NULL); htconf = d; } static void set80211dwds(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_DWDS, d, 0, NULL); } static void set80211inact(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_INACTIVITY, d, 0, NULL); } static void set80211tsn(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_TSN, d, 0, NULL); } static void set80211dotd(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_DOTD, d, 0, NULL); } static void set80211smps(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_SMPS, d, 0, NULL); } static void set80211rifs(const char *val, int d, int s, const struct afswtch *rafp) { set80211(s, IEEE80211_IOC_RIFS, d, 0, NULL); } static void set80211vhtconf(const char *val, int d, int s, const struct afswtch *rafp) { if (get80211val(s, IEEE80211_IOC_VHTCONF, &vhtconf) < 0) errx(-1, "cannot set VHT setting"); printf("%s: vhtconf=0x%08x, d=%d\n", __func__, vhtconf, d); if (d < 0) { d = -d; vhtconf &= ~d; } else vhtconf |= d; printf("%s: vhtconf is now 0x%08x\n", __func__, vhtconf); set80211(s, IEEE80211_IOC_VHTCONF, vhtconf, 0, NULL); } static DECL_CMD_FUNC(set80211tdmaslot, val, d) { set80211(s, IEEE80211_IOC_TDMA_SLOT, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211tdmaslotcnt, val, d) { set80211(s, IEEE80211_IOC_TDMA_SLOTCNT, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211tdmaslotlen, val, d) { set80211(s, IEEE80211_IOC_TDMA_SLOTLEN, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211tdmabintval, val, d) { set80211(s, IEEE80211_IOC_TDMA_BINTERVAL, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211meshttl, val, d) { set80211(s, IEEE80211_IOC_MESH_TTL, atoi(val), 0, NULL); } static DECL_CMD_FUNC(set80211meshforward, val, d) { set80211(s, IEEE80211_IOC_MESH_FWRD, d, 0, NULL); } static DECL_CMD_FUNC(set80211meshgate, val, d) { set80211(s, IEEE80211_IOC_MESH_GATE, d, 0, NULL); } static DECL_CMD_FUNC(set80211meshpeering, val, d) { set80211(s, IEEE80211_IOC_MESH_AP, d, 0, NULL); } static DECL_CMD_FUNC(set80211meshmetric, val, d) { char v[12]; memcpy(v, val, sizeof(v)); set80211(s, IEEE80211_IOC_MESH_PR_METRIC, 0, 0, v); } static DECL_CMD_FUNC(set80211meshpath, val, d) { char v[12]; memcpy(v, val, sizeof(v)); set80211(s, IEEE80211_IOC_MESH_PR_PATH, 0, 0, v); } static int regdomain_sort(const void *a, const void *b) { #define CHAN_ALL \ (IEEE80211_CHAN_ALLTURBO|IEEE80211_CHAN_HALF|IEEE80211_CHAN_QUARTER) const struct ieee80211_channel *ca = a; const struct ieee80211_channel *cb = b; return ca->ic_freq == cb->ic_freq ? (ca->ic_flags & CHAN_ALL) - (cb->ic_flags & CHAN_ALL) : ca->ic_freq - cb->ic_freq; #undef CHAN_ALL } static const struct ieee80211_channel * chanlookup(const struct ieee80211_channel chans[], int nchans, int freq, int flags) { int i; flags &= IEEE80211_CHAN_ALLTURBO; for (i = 0; i < nchans; i++) { const struct ieee80211_channel *c = &chans[i]; if (c->ic_freq == freq && (c->ic_flags & IEEE80211_CHAN_ALLTURBO) == flags) return c; } return NULL; } static int chanfind(const struct ieee80211_channel chans[], int nchans, int flags) { int i; for (i = 0; i < nchans; i++) { const struct ieee80211_channel *c = &chans[i]; if ((c->ic_flags & flags) == flags) return 1; } return 0; } /* * Check channel compatibility. */ static int checkchan(const struct ieee80211req_chaninfo *avail, int freq, int flags) { flags &= ~REQ_FLAGS; /* * Check if exact channel is in the calibration table; * everything below is to deal with channels that we * want to include but that are not explicitly listed. */ if (chanlookup(avail->ic_chans, avail->ic_nchans, freq, flags) != NULL) return 1; if (flags & IEEE80211_CHAN_GSM) { /* * XXX GSM frequency mapping is handled in the kernel * so we cannot find them in the calibration table; * just accept the channel and the kernel will reject * the channel list if it's wrong. */ return 1; } /* * If this is a 1/2 or 1/4 width channel allow it if a full * width channel is present for this frequency, and the device * supports fractional channels on this band. This is a hack * that avoids bloating the calibration table; it may be better * by per-band attributes though (we are effectively calculating * this attribute by scanning the channel list ourself). */ if ((flags & (IEEE80211_CHAN_HALF | IEEE80211_CHAN_QUARTER)) == 0) return 0; if (chanlookup(avail->ic_chans, avail->ic_nchans, freq, flags &~ (IEEE80211_CHAN_HALF | IEEE80211_CHAN_QUARTER)) == NULL) return 0; if (flags & IEEE80211_CHAN_HALF) { return chanfind(avail->ic_chans, avail->ic_nchans, IEEE80211_CHAN_HALF | (flags & (IEEE80211_CHAN_2GHZ | IEEE80211_CHAN_5GHZ))); } else { return chanfind(avail->ic_chans, avail->ic_nchans, IEEE80211_CHAN_QUARTER | (flags & (IEEE80211_CHAN_2GHZ | IEEE80211_CHAN_5GHZ))); } } static void regdomain_addchans(struct ieee80211req_chaninfo *ci, const netband_head *bands, const struct ieee80211_regdomain *reg, uint32_t chanFlags, const struct ieee80211req_chaninfo *avail) { const struct netband *nb; const struct freqband *b; struct ieee80211_channel *c, *prev; int freq, hi_adj, lo_adj, channelSep; uint32_t flags; hi_adj = (chanFlags & IEEE80211_CHAN_HT40U) ? -20 : 0; lo_adj = (chanFlags & IEEE80211_CHAN_HT40D) ? 20 : 0; channelSep = (chanFlags & IEEE80211_CHAN_2GHZ) ? 0 : 40; LIST_FOREACH(nb, bands, next) { b = nb->band; if (verbose) { printf("%s:", __func__); printb(" chanFlags", chanFlags, IEEE80211_CHAN_BITS); printb(" bandFlags", nb->flags | b->flags, IEEE80211_CHAN_BITS); putchar('\n'); } prev = NULL; for (freq = b->freqStart + lo_adj; freq <= b->freqEnd + hi_adj; freq += b->chanSep) { /* * Construct flags for the new channel. We take * the attributes from the band descriptions except * for HT40 which is enabled generically (i.e. +/- * extension channel) in the band description and * then constrained according by channel separation. */ flags = nb->flags | b->flags; /* * VHT first - HT is a subset. * * XXX TODO: VHT80p80, VHT160 is not yet done. */ if (flags & IEEE80211_CHAN_VHT) { if ((chanFlags & IEEE80211_CHAN_VHT20) && (flags & IEEE80211_CHAN_VHT20) == 0) { if (verbose) printf("%u: skip, not a " "VHT20 channel\n", freq); continue; } if ((chanFlags & IEEE80211_CHAN_VHT40) && (flags & IEEE80211_CHAN_VHT40) == 0) { if (verbose) printf("%u: skip, not a " "VHT40 channel\n", freq); continue; } if ((chanFlags & IEEE80211_CHAN_VHT80) && (flags & IEEE80211_CHAN_VHT80) == 0) { if (verbose) printf("%u: skip, not a " "VHT80 channel\n", freq); continue; } flags &= ~IEEE80211_CHAN_VHT; flags |= chanFlags & IEEE80211_CHAN_VHT; } /* Now, constrain HT */ if (flags & IEEE80211_CHAN_HT) { /* * HT channels are generated specially; we're * called to add HT20, HT40+, and HT40- chan's * so we need to expand only band specs for * the HT channel type being added. */ if ((chanFlags & IEEE80211_CHAN_HT20) && (flags & IEEE80211_CHAN_HT20) == 0) { if (verbose) printf("%u: skip, not an " "HT20 channel\n", freq); continue; } if ((chanFlags & IEEE80211_CHAN_HT40) && (flags & IEEE80211_CHAN_HT40) == 0) { if (verbose) printf("%u: skip, not an " "HT40 channel\n", freq); continue; } /* NB: HT attribute comes from caller */ flags &= ~IEEE80211_CHAN_HT; flags |= chanFlags & IEEE80211_CHAN_HT; } /* * Check if device can operate on this frequency. */ if (!checkchan(avail, freq, flags)) { if (verbose) { printf("%u: skip, ", freq); printb("flags", flags, IEEE80211_CHAN_BITS); printf(" not available\n"); } continue; } if ((flags & REQ_ECM) && !reg->ecm) { if (verbose) printf("%u: skip, ECM channel\n", freq); continue; } if ((flags & REQ_INDOOR) && reg->location == 'O') { if (verbose) printf("%u: skip, indoor channel\n", freq); continue; } if ((flags & REQ_OUTDOOR) && reg->location == 'I') { if (verbose) printf("%u: skip, outdoor channel\n", freq); continue; } if ((flags & IEEE80211_CHAN_HT40) && prev != NULL && (freq - prev->ic_freq) < channelSep) { if (verbose) printf("%u: skip, only %u channel " "separation, need %d\n", freq, freq - prev->ic_freq, channelSep); continue; } if (ci->ic_nchans == IEEE80211_CHAN_MAX) { if (verbose) printf("%u: skip, channel table full\n", freq); break; } c = &ci->ic_chans[ci->ic_nchans++]; memset(c, 0, sizeof(*c)); c->ic_freq = freq; c->ic_flags = flags; if (c->ic_flags & IEEE80211_CHAN_DFS) c->ic_maxregpower = nb->maxPowerDFS; else c->ic_maxregpower = nb->maxPower; if (verbose) { printf("[%3d] add freq %u ", ci->ic_nchans-1, c->ic_freq); printb("flags", c->ic_flags, IEEE80211_CHAN_BITS); printf(" power %u\n", c->ic_maxregpower); } /* NB: kernel fills in other fields */ prev = c; } } } static void regdomain_makechannels( struct ieee80211_regdomain_req *req, const struct ieee80211_devcaps_req *dc) { struct regdata *rdp = getregdata(); const struct country *cc; const struct ieee80211_regdomain *reg = &req->rd; struct ieee80211req_chaninfo *ci = &req->chaninfo; const struct regdomain *rd; /* * Locate construction table for new channel list. We treat * the regdomain/SKU as definitive so a country can be in * multiple with different properties (e.g. US in FCC+FCC3). * If no regdomain is specified then we fallback on the country * code to find the associated regdomain since countries always * belong to at least one regdomain. */ if (reg->regdomain == 0) { cc = lib80211_country_findbycc(rdp, reg->country); if (cc == NULL) errx(1, "internal error, country %d not found", reg->country); rd = cc->rd; } else rd = lib80211_regdomain_findbysku(rdp, reg->regdomain); if (rd == NULL) errx(1, "internal error, regdomain %d not found", reg->regdomain); if (rd->sku != SKU_DEBUG) { /* * regdomain_addchans incrememnts the channel count for * each channel it adds so initialize ic_nchans to zero. * Note that we know we have enough space to hold all possible * channels because the devcaps list size was used to * allocate our request. */ ci->ic_nchans = 0; if (!LIST_EMPTY(&rd->bands_11b)) regdomain_addchans(ci, &rd->bands_11b, reg, IEEE80211_CHAN_B, &dc->dc_chaninfo); if (!LIST_EMPTY(&rd->bands_11g)) regdomain_addchans(ci, &rd->bands_11g, reg, IEEE80211_CHAN_G, &dc->dc_chaninfo); if (!LIST_EMPTY(&rd->bands_11a)) regdomain_addchans(ci, &rd->bands_11a, reg, IEEE80211_CHAN_A, &dc->dc_chaninfo); if (!LIST_EMPTY(&rd->bands_11na) && dc->dc_htcaps != 0) { regdomain_addchans(ci, &rd->bands_11na, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT20, &dc->dc_chaninfo); if (dc->dc_htcaps & IEEE80211_HTCAP_CHWIDTH40) { regdomain_addchans(ci, &rd->bands_11na, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U, &dc->dc_chaninfo); regdomain_addchans(ci, &rd->bands_11na, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D, &dc->dc_chaninfo); } } if (!LIST_EMPTY(&rd->bands_11ac) && dc->dc_vhtcaps != 0) { regdomain_addchans(ci, &rd->bands_11ac, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT20 | IEEE80211_CHAN_VHT20, &dc->dc_chaninfo); /* VHT40 is a function of HT40.. */ if (dc->dc_htcaps & IEEE80211_HTCAP_CHWIDTH40) { regdomain_addchans(ci, &rd->bands_11ac, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U | IEEE80211_CHAN_VHT40U, &dc->dc_chaninfo); regdomain_addchans(ci, &rd->bands_11ac, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D | IEEE80211_CHAN_VHT40D, &dc->dc_chaninfo); } /* VHT80 */ /* XXX dc_vhtcap? */ if (1) { regdomain_addchans(ci, &rd->bands_11ac, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40U | IEEE80211_CHAN_VHT80, &dc->dc_chaninfo); regdomain_addchans(ci, &rd->bands_11ac, reg, IEEE80211_CHAN_A | IEEE80211_CHAN_HT40D | IEEE80211_CHAN_VHT80, &dc->dc_chaninfo); } /* XXX TODO: VHT80_80, VHT160 */ } if (!LIST_EMPTY(&rd->bands_11ng) && dc->dc_htcaps != 0) { regdomain_addchans(ci, &rd->bands_11ng, reg, IEEE80211_CHAN_G | IEEE80211_CHAN_HT20, &dc->dc_chaninfo); if (dc->dc_htcaps & IEEE80211_HTCAP_CHWIDTH40) { regdomain_addchans(ci, &rd->bands_11ng, reg, IEEE80211_CHAN_G | IEEE80211_CHAN_HT40U, &dc->dc_chaninfo); regdomain_addchans(ci, &rd->bands_11ng, reg, IEEE80211_CHAN_G | IEEE80211_CHAN_HT40D, &dc->dc_chaninfo); } } qsort(ci->ic_chans, ci->ic_nchans, sizeof(ci->ic_chans[0]), regdomain_sort); } else memcpy(ci, &dc->dc_chaninfo, IEEE80211_CHANINFO_SPACE(&dc->dc_chaninfo)); } static void list_countries(void) { struct regdata *rdp = getregdata(); const struct country *cp; const struct regdomain *dp; int i; i = 0; printf("\nCountry codes:\n"); LIST_FOREACH(cp, &rdp->countries, next) { printf("%2s %-15.15s%s", cp->isoname, cp->name, ((i+1)%4) == 0 ? "\n" : " "); i++; } i = 0; printf("\nRegulatory domains:\n"); LIST_FOREACH(dp, &rdp->domains, next) { printf("%-15.15s%s", dp->name, ((i+1)%4) == 0 ? "\n" : " "); i++; } printf("\n"); } static void defaultcountry(const struct regdomain *rd) { struct regdata *rdp = getregdata(); const struct country *cc; cc = lib80211_country_findbycc(rdp, rd->cc->code); if (cc == NULL) errx(1, "internal error, ISO country code %d not " "defined for regdomain %s", rd->cc->code, rd->name); regdomain.country = cc->code; regdomain.isocc[0] = cc->isoname[0]; regdomain.isocc[1] = cc->isoname[1]; } static DECL_CMD_FUNC(set80211regdomain, val, d) { struct regdata *rdp = getregdata(); const struct regdomain *rd; rd = lib80211_regdomain_findbyname(rdp, val); if (rd == NULL) { char *eptr; long sku = strtol(val, &eptr, 0); if (eptr != val) rd = lib80211_regdomain_findbysku(rdp, sku); if (eptr == val || rd == NULL) errx(1, "unknown regdomain %s", val); } getregdomain(s); regdomain.regdomain = rd->sku; if (regdomain.country == 0 && rd->cc != NULL) { /* * No country code setup and there's a default * one for this regdomain fill it in. */ defaultcountry(rd); } callback_register(setregdomain_cb, ®domain); } static DECL_CMD_FUNC(set80211country, val, d) { struct regdata *rdp = getregdata(); const struct country *cc; cc = lib80211_country_findbyname(rdp, val); if (cc == NULL) { char *eptr; long code = strtol(val, &eptr, 0); if (eptr != val) cc = lib80211_country_findbycc(rdp, code); if (eptr == val || cc == NULL) errx(1, "unknown ISO country code %s", val); } getregdomain(s); regdomain.regdomain = cc->rd->sku; regdomain.country = cc->code; regdomain.isocc[0] = cc->isoname[0]; regdomain.isocc[1] = cc->isoname[1]; callback_register(setregdomain_cb, ®domain); } static void set80211location(const char *val, int d, int s, const struct afswtch *rafp) { getregdomain(s); regdomain.location = d; callback_register(setregdomain_cb, ®domain); } static void set80211ecm(const char *val, int d, int s, const struct afswtch *rafp) { getregdomain(s); regdomain.ecm = d; callback_register(setregdomain_cb, ®domain); } static void LINE_INIT(char c) { spacer = c; if (c == '\t') col = 8; else col = 1; } static void LINE_BREAK(void) { if (spacer != '\t') { printf("\n"); spacer = '\t'; } col = 8; /* 8-col tab */ } static void LINE_CHECK(const char *fmt, ...) { char buf[80]; va_list ap; int n; va_start(ap, fmt); n = vsnprintf(buf+1, sizeof(buf)-1, fmt, ap); va_end(ap); col += 1+n; if (col > MAXCOL) { LINE_BREAK(); col += n; } buf[0] = spacer; printf("%s", buf); spacer = ' '; } static int getmaxrate(const uint8_t rates[15], uint8_t nrates) { int i, maxrate = -1; for (i = 0; i < nrates; i++) { int rate = rates[i] & IEEE80211_RATE_VAL; if (rate > maxrate) maxrate = rate; } return maxrate / 2; } static const char * getcaps(int capinfo) { static char capstring[32]; char *cp = capstring; if (capinfo & IEEE80211_CAPINFO_ESS) *cp++ = 'E'; if (capinfo & IEEE80211_CAPINFO_IBSS) *cp++ = 'I'; if (capinfo & IEEE80211_CAPINFO_CF_POLLABLE) *cp++ = 'c'; if (capinfo & IEEE80211_CAPINFO_CF_POLLREQ) *cp++ = 'C'; if (capinfo & IEEE80211_CAPINFO_PRIVACY) *cp++ = 'P'; if (capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE) *cp++ = 'S'; if (capinfo & IEEE80211_CAPINFO_PBCC) *cp++ = 'B'; if (capinfo & IEEE80211_CAPINFO_CHNL_AGILITY) *cp++ = 'A'; if (capinfo & IEEE80211_CAPINFO_SHORT_SLOTTIME) *cp++ = 's'; if (capinfo & IEEE80211_CAPINFO_RSN) *cp++ = 'R'; if (capinfo & IEEE80211_CAPINFO_DSSSOFDM) *cp++ = 'D'; *cp = '\0'; return capstring; } static const char * getflags(int flags) { static char flagstring[32]; char *cp = flagstring; if (flags & IEEE80211_NODE_AUTH) *cp++ = 'A'; if (flags & IEEE80211_NODE_QOS) *cp++ = 'Q'; if (flags & IEEE80211_NODE_ERP) *cp++ = 'E'; if (flags & IEEE80211_NODE_PWR_MGT) *cp++ = 'P'; if (flags & IEEE80211_NODE_HT) { *cp++ = 'H'; if (flags & IEEE80211_NODE_HTCOMPAT) *cp++ = '+'; } if (flags & IEEE80211_NODE_VHT) *cp++ = 'V'; if (flags & IEEE80211_NODE_WPS) *cp++ = 'W'; if (flags & IEEE80211_NODE_TSN) *cp++ = 'N'; if (flags & IEEE80211_NODE_AMPDU_TX) *cp++ = 'T'; if (flags & IEEE80211_NODE_AMPDU_RX) *cp++ = 'R'; if (flags & IEEE80211_NODE_MIMO_PS) { *cp++ = 'M'; if (flags & IEEE80211_NODE_MIMO_RTS) *cp++ = '+'; } if (flags & IEEE80211_NODE_RIFS) *cp++ = 'I'; if (flags & IEEE80211_NODE_SGI40) { *cp++ = 'S'; if (flags & IEEE80211_NODE_SGI20) *cp++ = '+'; } else if (flags & IEEE80211_NODE_SGI20) *cp++ = 's'; if (flags & IEEE80211_NODE_AMSDU_TX) *cp++ = 't'; if (flags & IEEE80211_NODE_AMSDU_RX) *cp++ = 'r'; *cp = '\0'; return flagstring; } static void printie(const char* tag, const uint8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { maxlen -= strlen(tag)+2; if (2*ielen > maxlen) maxlen--; printf("<"); for (; ielen > 0; ie++, ielen--) { if (maxlen-- <= 0) break; printf("%02x", *ie); } if (ielen != 0) printf("-"); printf(">"); } } #define LE_READ_2(p) \ ((u_int16_t) \ ((((const u_int8_t *)(p))[0] ) | \ (((const u_int8_t *)(p))[1] << 8))) #define LE_READ_4(p) \ ((u_int32_t) \ ((((const u_int8_t *)(p))[0] ) | \ (((const u_int8_t *)(p))[1] << 8) | \ (((const u_int8_t *)(p))[2] << 16) | \ (((const u_int8_t *)(p))[3] << 24))) /* * NB: The decoding routines assume a properly formatted ie * which should be safe as the kernel only retains them * if they parse ok. */ static void printwmeparam(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { #define MS(_v, _f) (((_v) & _f) >> _f##_S) static const char *acnames[] = { "BE", "BK", "VO", "VI" }; const struct ieee80211_wme_param *wme = (const struct ieee80211_wme_param *) ie; int i; printf("%s", tag); if (!verbose) return; printf("param_qosInfo); ie += offsetof(struct ieee80211_wme_param, params_acParams); for (i = 0; i < WME_NUM_AC; i++) { const struct ieee80211_wme_acparams *ac = &wme->params_acParams[i]; printf(" %s[%saifsn %u cwmin %u cwmax %u txop %u]" , acnames[i] , MS(ac->acp_aci_aifsn, WME_PARAM_ACM) ? "acm " : "" , MS(ac->acp_aci_aifsn, WME_PARAM_AIFSN) , MS(ac->acp_logcwminmax, WME_PARAM_LOGCWMIN) , MS(ac->acp_logcwminmax, WME_PARAM_LOGCWMAX) , LE_READ_2(&ac->acp_txop) ); } printf(">"); #undef MS } static void printwmeinfo(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_wme_info *wme = (const struct ieee80211_wme_info *) ie; printf("", wme->wme_version, wme->wme_info); } } static void printvhtcap(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_ie_vhtcap *vhtcap = (const struct ieee80211_ie_vhtcap *) ie; uint32_t vhtcap_info = LE_READ_4(&vhtcap->vht_cap_info); printf("supp_mcs.rx_mcs_map)); printf(" rx_highest %d", LE_READ_2(&vhtcap->supp_mcs.rx_highest) & 0x1fff); printf(" tx_mcs_map 0x%x", LE_READ_2(&vhtcap->supp_mcs.tx_mcs_map)); printf(" tx_highest %d", LE_READ_2(&vhtcap->supp_mcs.tx_highest) & 0x1fff); printf(">"); } } static void printvhtinfo(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_ie_vht_operation *vhtinfo = (const struct ieee80211_ie_vht_operation *) ie; printf("", vhtinfo->chan_width, vhtinfo->center_freq_seg1_idx, vhtinfo->center_freq_seg2_idx, LE_READ_2(&vhtinfo->basic_mcs_set)); } } static void printvhtpwrenv(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); static const char *txpwrmap[] = { "20", "40", "80", "160", }; if (verbose) { const struct ieee80211_ie_vht_txpwrenv *vhtpwr = (const struct ieee80211_ie_vht_txpwrenv *) ie; int i, n; const char *sep = ""; /* Get count; trim at ielen */ n = (vhtpwr->tx_info & IEEE80211_VHT_TXPWRENV_INFO_COUNT_MASK) + 1; /* Trim at ielen */ if (n > ielen - 3) n = ielen - 3; printf("tx_info); for (i = 0; i < n; i++) { printf("%s%s:%.2f", sep, txpwrmap[i], ((float) ((int8_t) ie[i+3])) / 2.0); sep = " "; } printf("]>"); } } static void printhtcap(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_ie_htcap *htcap = (const struct ieee80211_ie_htcap *) ie; const char *sep; int i, j; printf("hc_cap), htcap->hc_param); printf(" mcsset["); sep = ""; for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) if (isset(htcap->hc_mcsset, i)) { for (j = i+1; j < IEEE80211_HTRATE_MAXSIZE; j++) if (isclr(htcap->hc_mcsset, j)) break; j--; if (i == j) printf("%s%u", sep, i); else printf("%s%u-%u", sep, i, j); i += j-i; sep = ","; } printf("] extcap 0x%x txbf 0x%x antenna 0x%x>", LE_READ_2(&htcap->hc_extcap), LE_READ_4(&htcap->hc_txbf), htcap->hc_antenna); } } static void printhtinfo(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_ie_htinfo *htinfo = (const struct ieee80211_ie_htinfo *) ie; const char *sep; int i, j; printf("hi_ctrlchannel, htinfo->hi_byte1, htinfo->hi_byte2, htinfo->hi_byte3, LE_READ_2(&htinfo->hi_byte45)); printf(" basicmcs["); sep = ""; for (i = 0; i < IEEE80211_HTRATE_MAXSIZE; i++) if (isset(htinfo->hi_basicmcsset, i)) { for (j = i+1; j < IEEE80211_HTRATE_MAXSIZE; j++) if (isclr(htinfo->hi_basicmcsset, j)) break; j--; if (i == j) printf("%s%u", sep, i); else printf("%s%u-%u", sep, i, j); i += j-i; sep = ","; } printf("]>"); } } static void printathie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_ath_ie *ath = (const struct ieee80211_ath_ie *)ie; printf("<"); if (ath->ath_capability & ATHEROS_CAP_TURBO_PRIME) printf("DTURBO,"); if (ath->ath_capability & ATHEROS_CAP_COMPRESSION) printf("COMP,"); if (ath->ath_capability & ATHEROS_CAP_FAST_FRAME) printf("FF,"); if (ath->ath_capability & ATHEROS_CAP_XR) printf("XR,"); if (ath->ath_capability & ATHEROS_CAP_AR) printf("AR,"); if (ath->ath_capability & ATHEROS_CAP_BURST) printf("BURST,"); if (ath->ath_capability & ATHEROS_CAP_WME) printf("WME,"); if (ath->ath_capability & ATHEROS_CAP_BOOST) printf("BOOST,"); printf("0x%x>", LE_READ_2(ath->ath_defkeyix)); } } static void printmeshconf(const char *tag, const uint8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_meshconf_ie *mconf = (const struct ieee80211_meshconf_ie *)ie; printf("conf_pselid == IEEE80211_MESHCONF_PATH_HWMP) printf("HWMP"); else printf("UNKNOWN"); printf(" LINK:"); if (mconf->conf_pmetid == IEEE80211_MESHCONF_METRIC_AIRTIME) printf("AIRTIME"); else printf("UNKNOWN"); printf(" CONGESTION:"); if (mconf->conf_ccid == IEEE80211_MESHCONF_CC_DISABLED) printf("DISABLED"); else printf("UNKNOWN"); printf(" SYNC:"); if (mconf->conf_syncid == IEEE80211_MESHCONF_SYNC_NEIGHOFF) printf("NEIGHOFF"); else printf("UNKNOWN"); printf(" AUTH:"); if (mconf->conf_authid == IEEE80211_MESHCONF_AUTH_DISABLED) printf("DISABLED"); else printf("UNKNOWN"); printf(" FORM:0x%x CAPS:0x%x>", mconf->conf_form, mconf->conf_cap); } } static void printbssload(const char *tag, const uint8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_bss_load_ie *bssload = (const struct ieee80211_bss_load_ie *) ie; printf("", LE_READ_2(&bssload->sta_count), bssload->chan_load, bssload->aac); } } static void printapchanrep(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const struct ieee80211_ap_chan_report_ie *ap = (const struct ieee80211_ap_chan_report_ie *) ie; const char *sep = ""; int i; printf("i_class); for (i = 3; i < ielen; i++) { printf("%s%u", sep, ie[i]); sep = ","; } printf("]>"); } } static const char * wpa_cipher(const u_int8_t *sel) { #define WPA_SEL(x) (((x)<<24)|WPA_OUI) u_int32_t w = LE_READ_4(sel); switch (w) { case WPA_SEL(WPA_CSE_NULL): return "NONE"; case WPA_SEL(WPA_CSE_WEP40): return "WEP40"; case WPA_SEL(WPA_CSE_WEP104): return "WEP104"; case WPA_SEL(WPA_CSE_TKIP): return "TKIP"; case WPA_SEL(WPA_CSE_CCMP): return "AES-CCMP"; } return "?"; /* NB: so 1<< is discarded */ #undef WPA_SEL } static const char * wpa_keymgmt(const u_int8_t *sel) { #define WPA_SEL(x) (((x)<<24)|WPA_OUI) u_int32_t w = LE_READ_4(sel); switch (w) { case WPA_SEL(WPA_ASE_8021X_UNSPEC): return "8021X-UNSPEC"; case WPA_SEL(WPA_ASE_8021X_PSK): return "8021X-PSK"; case WPA_SEL(WPA_ASE_NONE): return "NONE"; } return "?"; #undef WPA_SEL } static void printwpaie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { u_int8_t len = ie[1]; printf("%s", tag); if (verbose) { const char *sep; int n; ie += 6, len -= 4; /* NB: len is payload only */ printf(" 0; n--) { printf("%s%s", sep, wpa_cipher(ie)); ie += 4, len -= 4; sep = "+"; } /* key management algorithms */ n = LE_READ_2(ie); ie += 2, len -= 2; sep = " km:"; for (; n > 0; n--) { printf("%s%s", sep, wpa_keymgmt(ie)); ie += 4, len -= 4; sep = "+"; } if (len > 2) /* optional capabilities */ printf(", caps 0x%x", LE_READ_2(ie)); printf(">"); } } static const char * rsn_cipher(const u_int8_t *sel) { #define RSN_SEL(x) (((x)<<24)|RSN_OUI) u_int32_t w = LE_READ_4(sel); switch (w) { case RSN_SEL(RSN_CSE_NULL): return "NONE"; case RSN_SEL(RSN_CSE_WEP40): return "WEP40"; case RSN_SEL(RSN_CSE_WEP104): return "WEP104"; case RSN_SEL(RSN_CSE_TKIP): return "TKIP"; case RSN_SEL(RSN_CSE_CCMP): return "AES-CCMP"; case RSN_SEL(RSN_CSE_WRAP): return "AES-OCB"; } return "?"; #undef WPA_SEL } static const char * rsn_keymgmt(const u_int8_t *sel) { #define RSN_SEL(x) (((x)<<24)|RSN_OUI) u_int32_t w = LE_READ_4(sel); switch (w) { case RSN_SEL(RSN_ASE_8021X_UNSPEC): return "8021X-UNSPEC"; case RSN_SEL(RSN_ASE_8021X_PSK): return "8021X-PSK"; case RSN_SEL(RSN_ASE_NONE): return "NONE"; } return "?"; #undef RSN_SEL } static void printrsnie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose) { const char *sep; int n; ie += 2, ielen -= 2; printf(" 0; n--) { printf("%s%s", sep, rsn_cipher(ie)); ie += 4, ielen -= 4; sep = "+"; } /* key management algorithms */ n = LE_READ_2(ie); ie += 2, ielen -= 2; sep = " km:"; for (; n > 0; n--) { printf("%s%s", sep, rsn_keymgmt(ie)); ie += 4, ielen -= 4; sep = "+"; } if (ielen > 2) /* optional capabilities */ printf(", caps 0x%x", LE_READ_2(ie)); /* XXXPMKID */ printf(">"); } } /* XXX move to a public include file */ #define IEEE80211_WPS_DEV_PASS_ID 0x1012 #define IEEE80211_WPS_SELECTED_REG 0x1041 #define IEEE80211_WPS_SETUP_STATE 0x1044 #define IEEE80211_WPS_UUID_E 0x1047 #define IEEE80211_WPS_VERSION 0x104a #define BE_READ_2(p) \ ((u_int16_t) \ ((((const u_int8_t *)(p))[1] ) | \ (((const u_int8_t *)(p))[0] << 8))) static void printwpsie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { u_int8_t len = ie[1]; printf("%s", tag); if (verbose) { static const char *dev_pass_id[] = { "D", /* Default (PIN) */ "U", /* User-specified */ "M", /* Machine-specified */ "K", /* Rekey */ "P", /* PushButton */ "R" /* Registrar-specified */ }; int n; ie +=6, len -= 4; /* NB: len is payload only */ /* WPS IE in Beacon and Probe Resp frames have different fields */ printf("<"); while (len) { uint16_t tlv_type = BE_READ_2(ie); uint16_t tlv_len = BE_READ_2(ie + 2); /* some devices broadcast invalid WPS frames */ if (tlv_len > len) { printf("bad frame length tlv_type=0x%02x " "tlv_len=%d len=%d", tlv_type, tlv_len, len); break; } ie += 4, len -= 4; switch (tlv_type) { case IEEE80211_WPS_VERSION: printf("v:%d.%d", *ie >> 4, *ie & 0xf); break; case IEEE80211_WPS_SETUP_STATE: /* Only 1 and 2 are valid */ if (*ie == 0 || *ie >= 3) printf(" state:B"); else printf(" st:%s", *ie == 1 ? "N" : "C"); break; case IEEE80211_WPS_SELECTED_REG: printf(" sel:%s", *ie ? "T" : "F"); break; case IEEE80211_WPS_DEV_PASS_ID: n = LE_READ_2(ie); if (n < nitems(dev_pass_id)) printf(" dpi:%s", dev_pass_id[n]); break; case IEEE80211_WPS_UUID_E: printf(" uuid-e:"); for (n = 0; n < (tlv_len - 1); n++) printf("%02x-", ie[n]); printf("%02x", ie[n]); break; } ie += tlv_len, len -= tlv_len; } printf(">"); } } static void printtdmaie(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { printf("%s", tag); if (verbose && ielen >= sizeof(struct ieee80211_tdma_param)) { const struct ieee80211_tdma_param *tdma = (const struct ieee80211_tdma_param *) ie; /* XXX tstamp */ printf("", tdma->tdma_version, tdma->tdma_slot, tdma->tdma_slotcnt, LE_READ_2(&tdma->tdma_slotlen), tdma->tdma_bintval, tdma->tdma_inuse[0]); } } /* * Copy the ssid string contents into buf, truncating to fit. If the * ssid is entirely printable then just copy intact. Otherwise convert * to hexadecimal. If the result is truncated then replace the last * three characters with "...". */ static int copy_essid(char buf[], size_t bufsize, const u_int8_t *essid, size_t essid_len) { const u_int8_t *p; size_t maxlen; u_int i; if (essid_len > bufsize) maxlen = bufsize; else maxlen = essid_len; /* determine printable or not */ for (i = 0, p = essid; i < maxlen; i++, p++) { if (*p < ' ' || *p > 0x7e) break; } if (i != maxlen) { /* not printable, print as hex */ if (bufsize < 3) return 0; strlcpy(buf, "0x", bufsize); bufsize -= 2; p = essid; for (i = 0; i < maxlen && bufsize >= 2; i++) { sprintf(&buf[2+2*i], "%02x", p[i]); bufsize -= 2; } if (i != essid_len) memcpy(&buf[2+2*i-3], "...", 3); } else { /* printable, truncate as needed */ memcpy(buf, essid, maxlen); if (maxlen != essid_len) memcpy(&buf[maxlen-3], "...", 3); } return maxlen; } static void printssid(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { char ssid[2*IEEE80211_NWID_LEN+1]; printf("%s<%.*s>", tag, copy_essid(ssid, maxlen, ie+2, ie[1]), ssid); } static void printrates(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { const char *sep; int i; printf("%s", tag); sep = "<"; for (i = 2; i < ielen; i++) { printf("%s%s%d", sep, ie[i] & IEEE80211_RATE_BASIC ? "B" : "", ie[i] & IEEE80211_RATE_VAL); sep = ","; } printf(">"); } static void printcountry(const char *tag, const u_int8_t *ie, size_t ielen, int maxlen) { const struct ieee80211_country_ie *cie = (const struct ieee80211_country_ie *) ie; int i, nbands, schan, nchan; printf("%s<%c%c%c", tag, cie->cc[0], cie->cc[1], cie->cc[2]); nbands = (cie->len - 3) / sizeof(cie->band[0]); for (i = 0; i < nbands; i++) { schan = cie->band[i].schan; nchan = cie->band[i].nchan; if (nchan != 1) printf(" %u-%u,%u", schan, schan + nchan-1, cie->band[i].maxtxpwr); else printf(" %u,%u", schan, cie->band[i].maxtxpwr); } printf(">"); } static __inline int iswpaoui(const u_int8_t *frm) { return frm[1] > 3 && LE_READ_4(frm+2) == ((WPA_OUI_TYPE<<24)|WPA_OUI); } static __inline int iswmeinfo(const u_int8_t *frm) { return frm[1] > 5 && LE_READ_4(frm+2) == ((WME_OUI_TYPE<<24)|WME_OUI) && frm[6] == WME_INFO_OUI_SUBTYPE; } static __inline int iswmeparam(const u_int8_t *frm) { return frm[1] > 5 && LE_READ_4(frm+2) == ((WME_OUI_TYPE<<24)|WME_OUI) && frm[6] == WME_PARAM_OUI_SUBTYPE; } static __inline int isatherosoui(const u_int8_t *frm) { return frm[1] > 3 && LE_READ_4(frm+2) == ((ATH_OUI_TYPE<<24)|ATH_OUI); } static __inline int istdmaoui(const uint8_t *frm) { return frm[1] > 3 && LE_READ_4(frm+2) == ((TDMA_OUI_TYPE<<24)|TDMA_OUI); } static __inline int iswpsoui(const uint8_t *frm) { return frm[1] > 3 && LE_READ_4(frm+2) == ((WPS_OUI_TYPE<<24)|WPA_OUI); } static const char * iename(int elemid) { switch (elemid) { case IEEE80211_ELEMID_FHPARMS: return " FHPARMS"; case IEEE80211_ELEMID_CFPARMS: return " CFPARMS"; case IEEE80211_ELEMID_TIM: return " TIM"; case IEEE80211_ELEMID_IBSSPARMS:return " IBSSPARMS"; case IEEE80211_ELEMID_BSSLOAD: return " BSSLOAD"; case IEEE80211_ELEMID_CHALLENGE:return " CHALLENGE"; case IEEE80211_ELEMID_PWRCNSTR: return " PWRCNSTR"; case IEEE80211_ELEMID_PWRCAP: return " PWRCAP"; case IEEE80211_ELEMID_TPCREQ: return " TPCREQ"; case IEEE80211_ELEMID_TPCREP: return " TPCREP"; case IEEE80211_ELEMID_SUPPCHAN: return " SUPPCHAN"; case IEEE80211_ELEMID_CSA: return " CSA"; case IEEE80211_ELEMID_MEASREQ: return " MEASREQ"; case IEEE80211_ELEMID_MEASREP: return " MEASREP"; case IEEE80211_ELEMID_QUIET: return " QUIET"; case IEEE80211_ELEMID_IBSSDFS: return " IBSSDFS"; case IEEE80211_ELEMID_TPC: return " TPC"; case IEEE80211_ELEMID_CCKM: return " CCKM"; } return " ???"; } static void printies(const u_int8_t *vp, int ielen, int maxcols) { while (ielen > 0) { switch (vp[0]) { case IEEE80211_ELEMID_SSID: if (verbose) printssid(" SSID", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_RATES: case IEEE80211_ELEMID_XRATES: if (verbose) printrates(vp[0] == IEEE80211_ELEMID_RATES ? " RATES" : " XRATES", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_DSPARMS: if (verbose) printf(" DSPARMS<%u>", vp[2]); break; case IEEE80211_ELEMID_COUNTRY: if (verbose) printcountry(" COUNTRY", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_ERP: if (verbose) printf(" ERP<0x%x>", vp[2]); break; case IEEE80211_ELEMID_VENDOR: if (iswpaoui(vp)) printwpaie(" WPA", vp, 2+vp[1], maxcols); else if (iswmeinfo(vp)) printwmeinfo(" WME", vp, 2+vp[1], maxcols); else if (iswmeparam(vp)) printwmeparam(" WME", vp, 2+vp[1], maxcols); else if (isatherosoui(vp)) printathie(" ATH", vp, 2+vp[1], maxcols); else if (iswpsoui(vp)) printwpsie(" WPS", vp, 2+vp[1], maxcols); else if (istdmaoui(vp)) printtdmaie(" TDMA", vp, 2+vp[1], maxcols); else if (verbose) printie(" VEN", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_RSN: printrsnie(" RSN", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_HTCAP: printhtcap(" HTCAP", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_HTINFO: if (verbose) printhtinfo(" HTINFO", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_MESHID: if (verbose) printssid(" MESHID", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_MESHCONF: printmeshconf(" MESHCONF", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_VHT_CAP: printvhtcap(" VHTCAP", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_VHT_OPMODE: printvhtinfo(" VHTOPMODE", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_VHT_PWR_ENV: printvhtpwrenv(" VHTPWRENV", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_BSSLOAD: printbssload(" BSSLOAD", vp, 2+vp[1], maxcols); break; case IEEE80211_ELEMID_APCHANREP: printapchanrep(" APCHANREP", vp, 2+vp[1], maxcols); break; default: if (verbose) printie(iename(vp[0]), vp, 2+vp[1], maxcols); break; } ielen -= 2+vp[1]; vp += 2+vp[1]; } } static void printmimo(const struct ieee80211_mimo_info *mi) { int i; int r = 0; for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { if (mi->ch[i].rssi != 0) { r = 1; break; } } /* NB: don't muddy display unless there's something to show */ if (r == 0) return; /* XXX TODO: ignore EVM; secondary channels for now */ printf(" (rssi %.1f:%.1f:%.1f:%.1f nf %d:%d:%d:%d)", mi->ch[0].rssi[0] / 2.0, mi->ch[1].rssi[0] / 2.0, mi->ch[2].rssi[0] / 2.0, mi->ch[3].rssi[0] / 2.0, mi->ch[0].noise[0], mi->ch[1].noise[0], mi->ch[2].noise[0], mi->ch[3].noise[0]); } static void list_scan(int s) { uint8_t buf[24*1024]; char ssid[IEEE80211_NWID_LEN+1]; const uint8_t *cp; - int len, ssidmax, idlen; + int len, idlen; if (get80211len(s, IEEE80211_IOC_SCAN_RESULTS, buf, sizeof(buf), &len) < 0) errx(1, "unable to get scan results"); if (len < sizeof(struct ieee80211req_scan_result)) return; getchaninfo(s); - ssidmax = verbose ? IEEE80211_NWID_LEN : 32; printf("%-*.*s %-17.17s %4s %4s %-7s %3s %4s\n" - , ssidmax, ssidmax, "SSID/MESH ID" + , IEEE80211_NWID_LEN, IEEE80211_NWID_LEN, "SSID/MESH ID" , "BSSID" , "CHAN" , "RATE" , " S:N" , "INT" , "CAPS" ); cp = buf; do { const struct ieee80211req_scan_result *sr; const uint8_t *vp, *idp; sr = (const struct ieee80211req_scan_result *) cp; vp = cp + sr->isr_ie_off; if (sr->isr_meshid_len) { idp = vp + sr->isr_ssid_len; idlen = sr->isr_meshid_len; } else { idp = vp; idlen = sr->isr_ssid_len; } printf("%-*.*s %s %3d %3dM %4d:%-4d %4d %-4.4s" - , ssidmax - , copy_essid(ssid, ssidmax, idp, idlen) + , IEE80211_NWID_LEN + , copy_essid(ssid, IEE80211_NWID_LEN, idp, idlen) , ssid , ether_ntoa((const struct ether_addr *) sr->isr_bssid) , ieee80211_mhz2ieee(sr->isr_freq, sr->isr_flags) , getmaxrate(sr->isr_rates, sr->isr_nrates) , (sr->isr_rssi/2)+sr->isr_noise, sr->isr_noise , sr->isr_intval , getcaps(sr->isr_capinfo) ); printies(vp + sr->isr_ssid_len + sr->isr_meshid_len, sr->isr_ie_len, 24); printf("\n"); cp += sr->isr_len, len -= sr->isr_len; } while (len >= sizeof(struct ieee80211req_scan_result)); } static void scan_and_wait(int s) { struct ieee80211_scan_req sr; struct ieee80211req ireq; int sroute; sroute = socket(PF_ROUTE, SOCK_RAW, 0); if (sroute < 0) { perror("socket(PF_ROUTE,SOCK_RAW)"); return; } (void) memset(&ireq, 0, sizeof(ireq)); (void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name)); ireq.i_type = IEEE80211_IOC_SCAN_REQ; memset(&sr, 0, sizeof(sr)); sr.sr_flags = IEEE80211_IOC_SCAN_ACTIVE | IEEE80211_IOC_SCAN_BGSCAN | IEEE80211_IOC_SCAN_NOPICK | IEEE80211_IOC_SCAN_ONCE; sr.sr_duration = IEEE80211_IOC_SCAN_FOREVER; sr.sr_nssid = 0; ireq.i_data = &sr; ireq.i_len = sizeof(sr); /* * NB: only root can trigger a scan so ignore errors. Also ignore * possible errors from net80211, even if no new scan could be * started there might still be a valid scan cache. */ if (ioctl(s, SIOCS80211, &ireq) == 0) { char buf[2048]; struct if_announcemsghdr *ifan; struct rt_msghdr *rtm; do { if (read(sroute, buf, sizeof(buf)) < 0) { perror("read(PF_ROUTE)"); break; } rtm = (struct rt_msghdr *) buf; if (rtm->rtm_version != RTM_VERSION) break; ifan = (struct if_announcemsghdr *) rtm; } while (rtm->rtm_type != RTM_IEEE80211 || ifan->ifan_what != RTM_IEEE80211_SCAN); } close(sroute); } static DECL_CMD_FUNC(set80211scan, val, d) { scan_and_wait(s); list_scan(s); } static enum ieee80211_opmode get80211opmode(int s); static int gettxseq(const struct ieee80211req_sta_info *si) { int i, txseq; if ((si->isi_state & IEEE80211_NODE_QOS) == 0) return si->isi_txseqs[0]; /* XXX not right but usually what folks want */ txseq = 0; for (i = 0; i < IEEE80211_TID_SIZE; i++) if (si->isi_txseqs[i] > txseq) txseq = si->isi_txseqs[i]; return txseq; } static int getrxseq(const struct ieee80211req_sta_info *si) { int i, rxseq; if ((si->isi_state & IEEE80211_NODE_QOS) == 0) return si->isi_rxseqs[0]; /* XXX not right but usually what folks want */ rxseq = 0; for (i = 0; i < IEEE80211_TID_SIZE; i++) if (si->isi_rxseqs[i] > rxseq) rxseq = si->isi_rxseqs[i]; return rxseq; } static void list_stations(int s) { union { struct ieee80211req_sta_req req; uint8_t buf[24*1024]; } u; enum ieee80211_opmode opmode = get80211opmode(s); const uint8_t *cp; int len; /* broadcast address =>'s get all stations */ (void) memset(u.req.is_u.macaddr, 0xff, IEEE80211_ADDR_LEN); if (opmode == IEEE80211_M_STA) { /* * Get information about the associated AP. */ (void) get80211(s, IEEE80211_IOC_BSSID, u.req.is_u.macaddr, IEEE80211_ADDR_LEN); } if (get80211len(s, IEEE80211_IOC_STA_INFO, &u, sizeof(u), &len) < 0) errx(1, "unable to get station information"); if (len < sizeof(struct ieee80211req_sta_info)) return; getchaninfo(s); if (opmode == IEEE80211_M_MBSS) printf("%-17.17s %4s %5s %5s %7s %4s %4s %4s %6s %6s\n" , "ADDR" , "CHAN" , "LOCAL" , "PEER" , "STATE" , "RATE" , "RSSI" , "IDLE" , "TXSEQ" , "RXSEQ" ); else printf("%-17.17s %4s %4s %4s %4s %4s %6s %6s %4s %-7s\n" , "ADDR" , "AID" , "CHAN" , "RATE" , "RSSI" , "IDLE" , "TXSEQ" , "RXSEQ" , "CAPS" , "FLAG" ); cp = (const uint8_t *) u.req.info; do { const struct ieee80211req_sta_info *si; si = (const struct ieee80211req_sta_info *) cp; if (si->isi_len < sizeof(*si)) break; if (opmode == IEEE80211_M_MBSS) printf("%s %4d %5x %5x %7.7s %3dM %4.1f %4d %6d %6d" , ether_ntoa((const struct ether_addr*) si->isi_macaddr) , ieee80211_mhz2ieee(si->isi_freq, si->isi_flags) , si->isi_localid , si->isi_peerid , mesh_linkstate_string(si->isi_peerstate) , si->isi_txmbps/2 , si->isi_rssi/2. , si->isi_inact , gettxseq(si) , getrxseq(si) ); else printf("%s %4u %4d %3dM %4.1f %4d %6d %6d %-4.4s %-7.7s" , ether_ntoa((const struct ether_addr*) si->isi_macaddr) , IEEE80211_AID(si->isi_associd) , ieee80211_mhz2ieee(si->isi_freq, si->isi_flags) , si->isi_txmbps/2 , si->isi_rssi/2. , si->isi_inact , gettxseq(si) , getrxseq(si) , getcaps(si->isi_capinfo) , getflags(si->isi_state) ); printies(cp + si->isi_ie_off, si->isi_ie_len, 24); printmimo(&si->isi_mimo); printf("\n"); cp += si->isi_len, len -= si->isi_len; } while (len >= sizeof(struct ieee80211req_sta_info)); } static const char * mesh_linkstate_string(uint8_t state) { static const char *state_names[] = { [0] = "IDLE", [1] = "OPEN-TX", [2] = "OPEN-RX", [3] = "CONF-RX", [4] = "ESTAB", [5] = "HOLDING", }; if (state >= nitems(state_names)) { static char buf[10]; snprintf(buf, sizeof(buf), "#%u", state); return buf; } else return state_names[state]; } static const char * get_chaninfo(const struct ieee80211_channel *c, int precise, char buf[], size_t bsize) { buf[0] = '\0'; if (IEEE80211_IS_CHAN_FHSS(c)) strlcat(buf, " FHSS", bsize); if (IEEE80211_IS_CHAN_A(c)) strlcat(buf, " 11a", bsize); else if (IEEE80211_IS_CHAN_ANYG(c)) strlcat(buf, " 11g", bsize); else if (IEEE80211_IS_CHAN_B(c)) strlcat(buf, " 11b", bsize); if (IEEE80211_IS_CHAN_HALF(c)) strlcat(buf, "/10MHz", bsize); if (IEEE80211_IS_CHAN_QUARTER(c)) strlcat(buf, "/5MHz", bsize); if (IEEE80211_IS_CHAN_TURBO(c)) strlcat(buf, " Turbo", bsize); if (precise) { /* XXX should make VHT80U, VHT80D */ if (IEEE80211_IS_CHAN_VHT80(c) && IEEE80211_IS_CHAN_HT40D(c)) strlcat(buf, " vht/80-", bsize); else if (IEEE80211_IS_CHAN_VHT80(c) && IEEE80211_IS_CHAN_HT40U(c)) strlcat(buf, " vht/80+", bsize); else if (IEEE80211_IS_CHAN_VHT80(c)) strlcat(buf, " vht/80", bsize); else if (IEEE80211_IS_CHAN_VHT40D(c)) strlcat(buf, " vht/40-", bsize); else if (IEEE80211_IS_CHAN_VHT40U(c)) strlcat(buf, " vht/40+", bsize); else if (IEEE80211_IS_CHAN_VHT20(c)) strlcat(buf, " vht/20", bsize); else if (IEEE80211_IS_CHAN_HT20(c)) strlcat(buf, " ht/20", bsize); else if (IEEE80211_IS_CHAN_HT40D(c)) strlcat(buf, " ht/40-", bsize); else if (IEEE80211_IS_CHAN_HT40U(c)) strlcat(buf, " ht/40+", bsize); } else { if (IEEE80211_IS_CHAN_VHT(c)) strlcat(buf, " vht", bsize); else if (IEEE80211_IS_CHAN_HT(c)) strlcat(buf, " ht", bsize); } return buf; } static void print_chaninfo(const struct ieee80211_channel *c, int verb) { char buf[14]; if (verb) printf("Channel %3u : %u%c%c%c%c%c MHz%-14.14s", ieee80211_mhz2ieee(c->ic_freq, c->ic_flags), c->ic_freq, IEEE80211_IS_CHAN_PASSIVE(c) ? '*' : ' ', IEEE80211_IS_CHAN_DFS(c) ? 'D' : ' ', IEEE80211_IS_CHAN_RADAR(c) ? 'R' : ' ', IEEE80211_IS_CHAN_CWINT(c) ? 'I' : ' ', IEEE80211_IS_CHAN_CACDONE(c) ? 'C' : ' ', get_chaninfo(c, verb, buf, sizeof(buf))); else printf("Channel %3u : %u%c MHz%-14.14s", ieee80211_mhz2ieee(c->ic_freq, c->ic_flags), c->ic_freq, IEEE80211_IS_CHAN_PASSIVE(c) ? '*' : ' ', get_chaninfo(c, verb, buf, sizeof(buf))); } static int chanpref(const struct ieee80211_channel *c) { if (IEEE80211_IS_CHAN_VHT160(c)) return 80; if (IEEE80211_IS_CHAN_VHT80_80(c)) return 75; if (IEEE80211_IS_CHAN_VHT80(c)) return 70; if (IEEE80211_IS_CHAN_VHT40(c)) return 60; if (IEEE80211_IS_CHAN_VHT20(c)) return 50; if (IEEE80211_IS_CHAN_HT40(c)) return 40; if (IEEE80211_IS_CHAN_HT20(c)) return 30; if (IEEE80211_IS_CHAN_HALF(c)) return 10; if (IEEE80211_IS_CHAN_QUARTER(c)) return 5; if (IEEE80211_IS_CHAN_TURBO(c)) return 25; if (IEEE80211_IS_CHAN_A(c)) return 20; if (IEEE80211_IS_CHAN_G(c)) return 20; if (IEEE80211_IS_CHAN_B(c)) return 15; if (IEEE80211_IS_CHAN_PUREG(c)) return 15; return 0; } static void print_channels(int s, const struct ieee80211req_chaninfo *chans, int allchans, int verb) { struct ieee80211req_chaninfo *achans; uint8_t reported[IEEE80211_CHAN_BYTES]; const struct ieee80211_channel *c; int i, half; achans = malloc(IEEE80211_CHANINFO_SPACE(chans)); if (achans == NULL) errx(1, "no space for active channel list"); achans->ic_nchans = 0; memset(reported, 0, sizeof(reported)); if (!allchans) { struct ieee80211req_chanlist active; if (get80211(s, IEEE80211_IOC_CHANLIST, &active, sizeof(active)) < 0) errx(1, "unable to get active channel list"); for (i = 0; i < chans->ic_nchans; i++) { c = &chans->ic_chans[i]; if (!isset(active.ic_channels, c->ic_ieee)) continue; /* * Suppress compatible duplicates unless * verbose. The kernel gives us it's * complete channel list which has separate * entries for 11g/11b and 11a/turbo. */ if (isset(reported, c->ic_ieee) && !verb) { /* XXX we assume duplicates are adjacent */ achans->ic_chans[achans->ic_nchans-1] = *c; } else { achans->ic_chans[achans->ic_nchans++] = *c; setbit(reported, c->ic_ieee); } } } else { for (i = 0; i < chans->ic_nchans; i++) { c = &chans->ic_chans[i]; /* suppress duplicates as above */ if (isset(reported, c->ic_ieee) && !verb) { /* XXX we assume duplicates are adjacent */ struct ieee80211_channel *a = &achans->ic_chans[achans->ic_nchans-1]; if (chanpref(c) > chanpref(a)) *a = *c; } else { achans->ic_chans[achans->ic_nchans++] = *c; setbit(reported, c->ic_ieee); } } } half = achans->ic_nchans / 2; if (achans->ic_nchans % 2) half++; for (i = 0; i < achans->ic_nchans / 2; i++) { print_chaninfo(&achans->ic_chans[i], verb); print_chaninfo(&achans->ic_chans[half+i], verb); printf("\n"); } if (achans->ic_nchans % 2) { print_chaninfo(&achans->ic_chans[i], verb); printf("\n"); } free(achans); } static void list_channels(int s, int allchans) { getchaninfo(s); print_channels(s, chaninfo, allchans, verbose); } static void print_txpow(const struct ieee80211_channel *c) { printf("Channel %3u : %u MHz %3.1f reg %2d ", c->ic_ieee, c->ic_freq, c->ic_maxpower/2., c->ic_maxregpower); } static void print_txpow_verbose(const struct ieee80211_channel *c) { print_chaninfo(c, 1); printf("min %4.1f dBm max %3.1f dBm reg %2d dBm", c->ic_minpower/2., c->ic_maxpower/2., c->ic_maxregpower); /* indicate where regulatory cap limits power use */ if (c->ic_maxpower > 2*c->ic_maxregpower) printf(" <"); } static void list_txpow(int s) { struct ieee80211req_chaninfo *achans; uint8_t reported[IEEE80211_CHAN_BYTES]; struct ieee80211_channel *c, *prev; int i, half; getchaninfo(s); achans = malloc(IEEE80211_CHANINFO_SPACE(chaninfo)); if (achans == NULL) errx(1, "no space for active channel list"); achans->ic_nchans = 0; memset(reported, 0, sizeof(reported)); for (i = 0; i < chaninfo->ic_nchans; i++) { c = &chaninfo->ic_chans[i]; /* suppress duplicates as above */ if (isset(reported, c->ic_ieee) && !verbose) { /* XXX we assume duplicates are adjacent */ assert(achans->ic_nchans > 0); prev = &achans->ic_chans[achans->ic_nchans-1]; /* display highest power on channel */ if (c->ic_maxpower > prev->ic_maxpower) *prev = *c; } else { achans->ic_chans[achans->ic_nchans++] = *c; setbit(reported, c->ic_ieee); } } if (!verbose) { half = achans->ic_nchans / 2; if (achans->ic_nchans % 2) half++; for (i = 0; i < achans->ic_nchans / 2; i++) { print_txpow(&achans->ic_chans[i]); print_txpow(&achans->ic_chans[half+i]); printf("\n"); } if (achans->ic_nchans % 2) { print_txpow(&achans->ic_chans[i]); printf("\n"); } } else { for (i = 0; i < achans->ic_nchans; i++) { print_txpow_verbose(&achans->ic_chans[i]); printf("\n"); } } free(achans); } static void list_keys(int s) { } static void list_capabilities(int s) { struct ieee80211_devcaps_req *dc; if (verbose) dc = malloc(IEEE80211_DEVCAPS_SIZE(MAXCHAN)); else dc = malloc(IEEE80211_DEVCAPS_SIZE(1)); if (dc == NULL) errx(1, "no space for device capabilities"); dc->dc_chaninfo.ic_nchans = verbose ? MAXCHAN : 1; getdevcaps(s, dc); printb("drivercaps", dc->dc_drivercaps, IEEE80211_C_BITS); if (dc->dc_cryptocaps != 0 || verbose) { putchar('\n'); printb("cryptocaps", dc->dc_cryptocaps, IEEE80211_CRYPTO_BITS); } if (dc->dc_htcaps != 0 || verbose) { putchar('\n'); printb("htcaps", dc->dc_htcaps, IEEE80211_HTCAP_BITS); } if (dc->dc_vhtcaps != 0 || verbose) { putchar('\n'); printb("vhtcaps", dc->dc_vhtcaps, IEEE80211_VHTCAP_BITS); } putchar('\n'); if (verbose) { chaninfo = &dc->dc_chaninfo; /* XXX */ print_channels(s, &dc->dc_chaninfo, 1/*allchans*/, verbose); } free(dc); } static int get80211wme(int s, int param, int ac, int *val) { struct ieee80211req ireq; (void) memset(&ireq, 0, sizeof(ireq)); (void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name)); ireq.i_type = param; ireq.i_len = ac; if (ioctl(s, SIOCG80211, &ireq) < 0) { warn("cannot get WME parameter %d, ac %d%s", param, ac & IEEE80211_WMEPARAM_VAL, ac & IEEE80211_WMEPARAM_BSS ? " (BSS)" : ""); return -1; } *val = ireq.i_val; return 0; } static void list_wme_aci(int s, const char *tag, int ac) { int val; printf("\t%s", tag); /* show WME BSS parameters */ if (get80211wme(s, IEEE80211_IOC_WME_CWMIN, ac, &val) != -1) printf(" cwmin %2u", val); if (get80211wme(s, IEEE80211_IOC_WME_CWMAX, ac, &val) != -1) printf(" cwmax %2u", val); if (get80211wme(s, IEEE80211_IOC_WME_AIFS, ac, &val) != -1) printf(" aifs %2u", val); if (get80211wme(s, IEEE80211_IOC_WME_TXOPLIMIT, ac, &val) != -1) printf(" txopLimit %3u", val); if (get80211wme(s, IEEE80211_IOC_WME_ACM, ac, &val) != -1) { if (val) printf(" acm"); else if (verbose) printf(" -acm"); } /* !BSS only */ if ((ac & IEEE80211_WMEPARAM_BSS) == 0) { if (get80211wme(s, IEEE80211_IOC_WME_ACKPOLICY, ac, &val) != -1) { if (!val) printf(" -ack"); else if (verbose) printf(" ack"); } } printf("\n"); } static void list_wme(int s) { static const char *acnames[] = { "AC_BE", "AC_BK", "AC_VI", "AC_VO" }; int ac; if (verbose) { /* display both BSS and local settings */ for (ac = WME_AC_BE; ac <= WME_AC_VO; ac++) { again: if (ac & IEEE80211_WMEPARAM_BSS) list_wme_aci(s, " ", ac); else list_wme_aci(s, acnames[ac], ac); if ((ac & IEEE80211_WMEPARAM_BSS) == 0) { ac |= IEEE80211_WMEPARAM_BSS; goto again; } else ac &= ~IEEE80211_WMEPARAM_BSS; } } else { /* display only channel settings */ for (ac = WME_AC_BE; ac <= WME_AC_VO; ac++) list_wme_aci(s, acnames[ac], ac); } } static void list_roam(int s) { const struct ieee80211_roamparam *rp; int mode; getroam(s); for (mode = IEEE80211_MODE_11A; mode < IEEE80211_MODE_MAX; mode++) { rp = &roamparams.params[mode]; if (rp->rssi == 0 && rp->rate == 0) continue; if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) { if (rp->rssi & 1) LINE_CHECK("roam:%-7.7s rssi %2u.5dBm MCS %2u ", modename[mode], rp->rssi/2, rp->rate &~ IEEE80211_RATE_MCS); else LINE_CHECK("roam:%-7.7s rssi %4udBm MCS %2u ", modename[mode], rp->rssi/2, rp->rate &~ IEEE80211_RATE_MCS); } else { if (rp->rssi & 1) LINE_CHECK("roam:%-7.7s rssi %2u.5dBm rate %2u Mb/s", modename[mode], rp->rssi/2, rp->rate/2); else LINE_CHECK("roam:%-7.7s rssi %4udBm rate %2u Mb/s", modename[mode], rp->rssi/2, rp->rate/2); } } } static void list_txparams(int s) { const struct ieee80211_txparam *tp; int mode; gettxparams(s); for (mode = IEEE80211_MODE_11A; mode < IEEE80211_MODE_MAX; mode++) { tp = &txparams.params[mode]; if (tp->mgmtrate == 0 && tp->mcastrate == 0) continue; if (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) LINE_CHECK("%-7.7s ucast NONE mgmt %2u MCS " "mcast %2u MCS maxretry %u", modename[mode], tp->mgmtrate &~ IEEE80211_RATE_MCS, tp->mcastrate &~ IEEE80211_RATE_MCS, tp->maxretry); else LINE_CHECK("%-7.7s ucast %2u MCS mgmt %2u MCS " "mcast %2u MCS maxretry %u", modename[mode], tp->ucastrate &~ IEEE80211_RATE_MCS, tp->mgmtrate &~ IEEE80211_RATE_MCS, tp->mcastrate &~ IEEE80211_RATE_MCS, tp->maxretry); } else { if (tp->ucastrate == IEEE80211_FIXED_RATE_NONE) LINE_CHECK("%-7.7s ucast NONE mgmt %2u Mb/s " "mcast %2u Mb/s maxretry %u", modename[mode], tp->mgmtrate/2, tp->mcastrate/2, tp->maxretry); else LINE_CHECK("%-7.7s ucast %2u Mb/s mgmt %2u Mb/s " "mcast %2u Mb/s maxretry %u", modename[mode], tp->ucastrate/2, tp->mgmtrate/2, tp->mcastrate/2, tp->maxretry); } } } static void printpolicy(int policy) { switch (policy) { case IEEE80211_MACCMD_POLICY_OPEN: printf("policy: open\n"); break; case IEEE80211_MACCMD_POLICY_ALLOW: printf("policy: allow\n"); break; case IEEE80211_MACCMD_POLICY_DENY: printf("policy: deny\n"); break; case IEEE80211_MACCMD_POLICY_RADIUS: printf("policy: radius\n"); break; default: printf("policy: unknown (%u)\n", policy); break; } } static void list_mac(int s) { struct ieee80211req ireq; struct ieee80211req_maclist *acllist; int i, nacls, policy, len; uint8_t *data; char c; (void) memset(&ireq, 0, sizeof(ireq)); (void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name)); /* XXX ?? */ ireq.i_type = IEEE80211_IOC_MACCMD; ireq.i_val = IEEE80211_MACCMD_POLICY; if (ioctl(s, SIOCG80211, &ireq) < 0) { if (errno == EINVAL) { printf("No acl policy loaded\n"); return; } err(1, "unable to get mac policy"); } policy = ireq.i_val; if (policy == IEEE80211_MACCMD_POLICY_OPEN) { c = '*'; } else if (policy == IEEE80211_MACCMD_POLICY_ALLOW) { c = '+'; } else if (policy == IEEE80211_MACCMD_POLICY_DENY) { c = '-'; } else if (policy == IEEE80211_MACCMD_POLICY_RADIUS) { c = 'r'; /* NB: should never have entries */ } else { printf("policy: unknown (%u)\n", policy); c = '?'; } if (verbose || c == '?') printpolicy(policy); ireq.i_val = IEEE80211_MACCMD_LIST; ireq.i_len = 0; if (ioctl(s, SIOCG80211, &ireq) < 0) err(1, "unable to get mac acl list size"); if (ireq.i_len == 0) { /* NB: no acls */ if (!(verbose || c == '?')) printpolicy(policy); return; } len = ireq.i_len; data = malloc(len); if (data == NULL) err(1, "out of memory for acl list"); ireq.i_data = data; if (ioctl(s, SIOCG80211, &ireq) < 0) err(1, "unable to get mac acl list"); nacls = len / sizeof(*acllist); acllist = (struct ieee80211req_maclist *) data; for (i = 0; i < nacls; i++) printf("%c%s\n", c, ether_ntoa( (const struct ether_addr *) acllist[i].ml_macaddr)); free(data); } static void print_regdomain(const struct ieee80211_regdomain *reg, int verb) { if ((reg->regdomain != 0 && reg->regdomain != reg->country) || verb) { const struct regdomain *rd = lib80211_regdomain_findbysku(getregdata(), reg->regdomain); if (rd == NULL) LINE_CHECK("regdomain %d", reg->regdomain); else LINE_CHECK("regdomain %s", rd->name); } if (reg->country != 0 || verb) { const struct country *cc = lib80211_country_findbycc(getregdata(), reg->country); if (cc == NULL) LINE_CHECK("country %d", reg->country); else LINE_CHECK("country %s", cc->isoname); } if (reg->location == 'I') LINE_CHECK("indoor"); else if (reg->location == 'O') LINE_CHECK("outdoor"); else if (verb) LINE_CHECK("anywhere"); if (reg->ecm) LINE_CHECK("ecm"); else if (verb) LINE_CHECK("-ecm"); } static void list_regdomain(int s, int channelsalso) { getregdomain(s); if (channelsalso) { getchaninfo(s); spacer = ':'; print_regdomain(®domain, 1); LINE_BREAK(); print_channels(s, chaninfo, 1/*allchans*/, 1/*verbose*/); } else print_regdomain(®domain, verbose); } static void list_mesh(int s) { struct ieee80211req ireq; struct ieee80211req_mesh_route routes[128]; struct ieee80211req_mesh_route *rt; (void) memset(&ireq, 0, sizeof(ireq)); (void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name)); ireq.i_type = IEEE80211_IOC_MESH_RTCMD; ireq.i_val = IEEE80211_MESH_RTCMD_LIST; ireq.i_data = &routes; ireq.i_len = sizeof(routes); if (ioctl(s, SIOCG80211, &ireq) < 0) err(1, "unable to get the Mesh routing table"); printf("%-17.17s %-17.17s %4s %4s %4s %6s %s\n" , "DEST" , "NEXT HOP" , "HOPS" , "METRIC" , "LIFETIME" , "MSEQ" , "FLAGS"); for (rt = &routes[0]; rt - &routes[0] < ireq.i_len / sizeof(*rt); rt++){ printf("%s ", ether_ntoa((const struct ether_addr *)rt->imr_dest)); printf("%s %4u %4u %6u %6u %c%c\n", ether_ntoa((const struct ether_addr *)rt->imr_nexthop), rt->imr_nhops, rt->imr_metric, rt->imr_lifetime, rt->imr_lastmseq, (rt->imr_flags & IEEE80211_MESHRT_FLAGS_DISCOVER) ? 'D' : (rt->imr_flags & IEEE80211_MESHRT_FLAGS_VALID) ? 'V' : '!', (rt->imr_flags & IEEE80211_MESHRT_FLAGS_PROXY) ? 'P' : (rt->imr_flags & IEEE80211_MESHRT_FLAGS_GATE) ? 'G' :' '); } } static DECL_CMD_FUNC(set80211list, arg, d) { #define iseq(a,b) (strncasecmp(a,b,sizeof(b)-1) == 0) LINE_INIT('\t'); if (iseq(arg, "sta")) list_stations(s); else if (iseq(arg, "scan") || iseq(arg, "ap")) list_scan(s); else if (iseq(arg, "chan") || iseq(arg, "freq")) list_channels(s, 1); else if (iseq(arg, "active")) list_channels(s, 0); else if (iseq(arg, "keys")) list_keys(s); else if (iseq(arg, "caps")) list_capabilities(s); else if (iseq(arg, "wme") || iseq(arg, "wmm")) list_wme(s); else if (iseq(arg, "mac")) list_mac(s); else if (iseq(arg, "txpow")) list_txpow(s); else if (iseq(arg, "roam")) list_roam(s); else if (iseq(arg, "txparam") || iseq(arg, "txparm")) list_txparams(s); else if (iseq(arg, "regdomain")) list_regdomain(s, 1); else if (iseq(arg, "countries")) list_countries(); else if (iseq(arg, "mesh")) list_mesh(s); else errx(1, "Don't know how to list %s for %s", arg, name); LINE_BREAK(); #undef iseq } static enum ieee80211_opmode get80211opmode(int s) { struct ifmediareq ifmr; (void) memset(&ifmr, 0, sizeof(ifmr)); (void) strlcpy(ifmr.ifm_name, name, sizeof(ifmr.ifm_name)); if (ioctl(s, SIOCGIFMEDIA, (caddr_t)&ifmr) >= 0) { if (ifmr.ifm_current & IFM_IEEE80211_ADHOC) { if (ifmr.ifm_current & IFM_FLAG0) return IEEE80211_M_AHDEMO; else return IEEE80211_M_IBSS; } if (ifmr.ifm_current & IFM_IEEE80211_HOSTAP) return IEEE80211_M_HOSTAP; if (ifmr.ifm_current & IFM_IEEE80211_IBSS) return IEEE80211_M_IBSS; if (ifmr.ifm_current & IFM_IEEE80211_MONITOR) return IEEE80211_M_MONITOR; if (ifmr.ifm_current & IFM_IEEE80211_MBSS) return IEEE80211_M_MBSS; } return IEEE80211_M_STA; } #if 0 static void printcipher(int s, struct ieee80211req *ireq, int keylenop) { switch (ireq->i_val) { case IEEE80211_CIPHER_WEP: ireq->i_type = keylenop; if (ioctl(s, SIOCG80211, ireq) != -1) printf("WEP-%s", ireq->i_len <= 5 ? "40" : ireq->i_len <= 13 ? "104" : "128"); else printf("WEP"); break; case IEEE80211_CIPHER_TKIP: printf("TKIP"); break; case IEEE80211_CIPHER_AES_OCB: printf("AES-OCB"); break; case IEEE80211_CIPHER_AES_CCM: printf("AES-CCM"); break; case IEEE80211_CIPHER_CKIP: printf("CKIP"); break; case IEEE80211_CIPHER_NONE: printf("NONE"); break; default: printf("UNKNOWN (0x%x)", ireq->i_val); break; } } #endif static void printkey(const struct ieee80211req_key *ik) { static const uint8_t zerodata[IEEE80211_KEYBUF_SIZE]; u_int keylen = ik->ik_keylen; int printcontents; printcontents = printkeys && (memcmp(ik->ik_keydata, zerodata, keylen) != 0 || verbose); if (printcontents) LINE_BREAK(); switch (ik->ik_type) { case IEEE80211_CIPHER_WEP: /* compatibility */ LINE_CHECK("wepkey %u:%s", ik->ik_keyix+1, keylen <= 5 ? "40-bit" : keylen <= 13 ? "104-bit" : "128-bit"); break; case IEEE80211_CIPHER_TKIP: if (keylen > 128/8) keylen -= 128/8; /* ignore MIC for now */ LINE_CHECK("TKIP %u:%u-bit", ik->ik_keyix+1, 8*keylen); break; case IEEE80211_CIPHER_AES_OCB: LINE_CHECK("AES-OCB %u:%u-bit", ik->ik_keyix+1, 8*keylen); break; case IEEE80211_CIPHER_AES_CCM: LINE_CHECK("AES-CCM %u:%u-bit", ik->ik_keyix+1, 8*keylen); break; case IEEE80211_CIPHER_CKIP: LINE_CHECK("CKIP %u:%u-bit", ik->ik_keyix+1, 8*keylen); break; case IEEE80211_CIPHER_NONE: LINE_CHECK("NULL %u:%u-bit", ik->ik_keyix+1, 8*keylen); break; default: LINE_CHECK("UNKNOWN (0x%x) %u:%u-bit", ik->ik_type, ik->ik_keyix+1, 8*keylen); break; } if (printcontents) { u_int i; printf(" <"); for (i = 0; i < keylen; i++) printf("%02x", ik->ik_keydata[i]); printf(">"); if (ik->ik_type != IEEE80211_CIPHER_WEP && (ik->ik_keyrsc != 0 || verbose)) printf(" rsc %ju", (uintmax_t)ik->ik_keyrsc); if (ik->ik_type != IEEE80211_CIPHER_WEP && (ik->ik_keytsc != 0 || verbose)) printf(" tsc %ju", (uintmax_t)ik->ik_keytsc); if (ik->ik_flags != 0 && verbose) { const char *sep = " "; if (ik->ik_flags & IEEE80211_KEY_XMIT) printf("%stx", sep), sep = "+"; if (ik->ik_flags & IEEE80211_KEY_RECV) printf("%srx", sep), sep = "+"; if (ik->ik_flags & IEEE80211_KEY_DEFAULT) printf("%sdef", sep), sep = "+"; } LINE_BREAK(); } } static void printrate(const char *tag, int v, int defrate, int defmcs) { if ((v & IEEE80211_RATE_MCS) == 0) { if (v != defrate) { if (v & 1) LINE_CHECK("%s %d.5", tag, v/2); else LINE_CHECK("%s %d", tag, v/2); } } else { if (v != defmcs) LINE_CHECK("%s %d", tag, v &~ 0x80); } } static int getid(int s, int ix, void *data, size_t len, int *plen, int mesh) { struct ieee80211req ireq; (void) memset(&ireq, 0, sizeof(ireq)); (void) strlcpy(ireq.i_name, name, sizeof(ireq.i_name)); ireq.i_type = (!mesh) ? IEEE80211_IOC_SSID : IEEE80211_IOC_MESH_ID; ireq.i_val = ix; ireq.i_data = data; ireq.i_len = len; if (ioctl(s, SIOCG80211, &ireq) < 0) return -1; *plen = ireq.i_len; return 0; } static void ieee80211_status(int s) { static const uint8_t zerobssid[IEEE80211_ADDR_LEN]; enum ieee80211_opmode opmode = get80211opmode(s); int i, num, wpa, wme, bgscan, bgscaninterval, val, len, wepmode; uint8_t data[32]; const struct ieee80211_channel *c; const struct ieee80211_roamparam *rp; const struct ieee80211_txparam *tp; if (getid(s, -1, data, sizeof(data), &len, 0) < 0) { /* If we can't get the SSID, this isn't an 802.11 device. */ return; } /* * Invalidate cached state so printing status for multiple * if's doesn't reuse the first interfaces' cached state. */ gotcurchan = 0; gotroam = 0; gottxparams = 0; gothtconf = 0; gotregdomain = 0; printf("\t"); if (opmode == IEEE80211_M_MBSS) { printf("meshid "); getid(s, 0, data, sizeof(data), &len, 1); print_string(data, len); } else { if (get80211val(s, IEEE80211_IOC_NUMSSIDS, &num) < 0) num = 0; printf("ssid "); if (num > 1) { for (i = 0; i < num; i++) { if (getid(s, i, data, sizeof(data), &len, 0) >= 0 && len > 0) { printf(" %d:", i + 1); print_string(data, len); } } } else print_string(data, len); } c = getcurchan(s); if (c->ic_freq != IEEE80211_CHAN_ANY) { char buf[14]; printf(" channel %d (%u MHz%s)", c->ic_ieee, c->ic_freq, get_chaninfo(c, 1, buf, sizeof(buf))); } else if (verbose) printf(" channel UNDEF"); if (get80211(s, IEEE80211_IOC_BSSID, data, IEEE80211_ADDR_LEN) >= 0 && (memcmp(data, zerobssid, sizeof(zerobssid)) != 0 || verbose)) printf(" bssid %s", ether_ntoa((struct ether_addr *)data)); if (get80211len(s, IEEE80211_IOC_STATIONNAME, data, sizeof(data), &len) != -1) { printf("\n\tstationname "); print_string(data, len); } spacer = ' '; /* force first break */ LINE_BREAK(); list_regdomain(s, 0); wpa = 0; if (get80211val(s, IEEE80211_IOC_AUTHMODE, &val) != -1) { switch (val) { case IEEE80211_AUTH_NONE: LINE_CHECK("authmode NONE"); break; case IEEE80211_AUTH_OPEN: LINE_CHECK("authmode OPEN"); break; case IEEE80211_AUTH_SHARED: LINE_CHECK("authmode SHARED"); break; case IEEE80211_AUTH_8021X: LINE_CHECK("authmode 802.1x"); break; case IEEE80211_AUTH_WPA: if (get80211val(s, IEEE80211_IOC_WPA, &wpa) < 0) wpa = 1; /* default to WPA1 */ switch (wpa) { case 2: LINE_CHECK("authmode WPA2/802.11i"); break; case 3: LINE_CHECK("authmode WPA1+WPA2/802.11i"); break; default: LINE_CHECK("authmode WPA"); break; } break; case IEEE80211_AUTH_AUTO: LINE_CHECK("authmode AUTO"); break; default: LINE_CHECK("authmode UNKNOWN (0x%x)", val); break; } } if (wpa || verbose) { if (get80211val(s, IEEE80211_IOC_WPS, &val) != -1) { if (val) LINE_CHECK("wps"); else if (verbose) LINE_CHECK("-wps"); } if (get80211val(s, IEEE80211_IOC_TSN, &val) != -1) { if (val) LINE_CHECK("tsn"); else if (verbose) LINE_CHECK("-tsn"); } if (ioctl(s, IEEE80211_IOC_COUNTERMEASURES, &val) != -1) { if (val) LINE_CHECK("countermeasures"); else if (verbose) LINE_CHECK("-countermeasures"); } #if 0 /* XXX not interesting with WPA done in user space */ ireq.i_type = IEEE80211_IOC_KEYMGTALGS; if (ioctl(s, SIOCG80211, &ireq) != -1) { } ireq.i_type = IEEE80211_IOC_MCASTCIPHER; if (ioctl(s, SIOCG80211, &ireq) != -1) { LINE_CHECK("mcastcipher "); printcipher(s, &ireq, IEEE80211_IOC_MCASTKEYLEN); spacer = ' '; } ireq.i_type = IEEE80211_IOC_UCASTCIPHER; if (ioctl(s, SIOCG80211, &ireq) != -1) { LINE_CHECK("ucastcipher "); printcipher(s, &ireq, IEEE80211_IOC_UCASTKEYLEN); } if (wpa & 2) { ireq.i_type = IEEE80211_IOC_RSNCAPS; if (ioctl(s, SIOCG80211, &ireq) != -1) { LINE_CHECK("RSN caps 0x%x", ireq.i_val); spacer = ' '; } } ireq.i_type = IEEE80211_IOC_UCASTCIPHERS; if (ioctl(s, SIOCG80211, &ireq) != -1) { } #endif } if (get80211val(s, IEEE80211_IOC_WEP, &wepmode) != -1 && wepmode != IEEE80211_WEP_NOSUP) { switch (wepmode) { case IEEE80211_WEP_OFF: LINE_CHECK("privacy OFF"); break; case IEEE80211_WEP_ON: LINE_CHECK("privacy ON"); break; case IEEE80211_WEP_MIXED: LINE_CHECK("privacy MIXED"); break; default: LINE_CHECK("privacy UNKNOWN (0x%x)", wepmode); break; } /* * If we get here then we've got WEP support so we need * to print WEP status. */ if (get80211val(s, IEEE80211_IOC_WEPTXKEY, &val) < 0) { warn("WEP support, but no tx key!"); goto end; } if (val != -1) LINE_CHECK("deftxkey %d", val+1); else if (wepmode != IEEE80211_WEP_OFF || verbose) LINE_CHECK("deftxkey UNDEF"); if (get80211val(s, IEEE80211_IOC_NUMWEPKEYS, &num) < 0) { warn("WEP support, but no NUMWEPKEYS support!"); goto end; } for (i = 0; i < num; i++) { struct ieee80211req_key ik; memset(&ik, 0, sizeof(ik)); ik.ik_keyix = i; if (get80211(s, IEEE80211_IOC_WPAKEY, &ik, sizeof(ik)) < 0) { warn("WEP support, but can get keys!"); goto end; } if (ik.ik_keylen != 0) { if (verbose) LINE_BREAK(); printkey(&ik); } } end: ; } if (get80211val(s, IEEE80211_IOC_POWERSAVE, &val) != -1 && val != IEEE80211_POWERSAVE_NOSUP ) { if (val != IEEE80211_POWERSAVE_OFF || verbose) { switch (val) { case IEEE80211_POWERSAVE_OFF: LINE_CHECK("powersavemode OFF"); break; case IEEE80211_POWERSAVE_CAM: LINE_CHECK("powersavemode CAM"); break; case IEEE80211_POWERSAVE_PSP: LINE_CHECK("powersavemode PSP"); break; case IEEE80211_POWERSAVE_PSP_CAM: LINE_CHECK("powersavemode PSP-CAM"); break; } if (get80211val(s, IEEE80211_IOC_POWERSAVESLEEP, &val) != -1) LINE_CHECK("powersavesleep %d", val); } } if (get80211val(s, IEEE80211_IOC_TXPOWER, &val) != -1) { if (val & 1) LINE_CHECK("txpower %d.5", val/2); else LINE_CHECK("txpower %d", val/2); } if (verbose) { if (get80211val(s, IEEE80211_IOC_TXPOWMAX, &val) != -1) LINE_CHECK("txpowmax %.1f", val/2.); } if (get80211val(s, IEEE80211_IOC_DOTD, &val) != -1) { if (val) LINE_CHECK("dotd"); else if (verbose) LINE_CHECK("-dotd"); } if (get80211val(s, IEEE80211_IOC_RTSTHRESHOLD, &val) != -1) { if (val != IEEE80211_RTS_MAX || verbose) LINE_CHECK("rtsthreshold %d", val); } if (get80211val(s, IEEE80211_IOC_FRAGTHRESHOLD, &val) != -1) { if (val != IEEE80211_FRAG_MAX || verbose) LINE_CHECK("fragthreshold %d", val); } if (opmode == IEEE80211_M_STA || verbose) { if (get80211val(s, IEEE80211_IOC_BMISSTHRESHOLD, &val) != -1) { if (val != IEEE80211_HWBMISS_MAX || verbose) LINE_CHECK("bmiss %d", val); } } if (!verbose) { gettxparams(s); tp = &txparams.params[chan2mode(c)]; printrate("ucastrate", tp->ucastrate, IEEE80211_FIXED_RATE_NONE, IEEE80211_FIXED_RATE_NONE); printrate("mcastrate", tp->mcastrate, 2*1, IEEE80211_RATE_MCS|0); printrate("mgmtrate", tp->mgmtrate, 2*1, IEEE80211_RATE_MCS|0); if (tp->maxretry != 6) /* XXX */ LINE_CHECK("maxretry %d", tp->maxretry); } else { LINE_BREAK(); list_txparams(s); } bgscaninterval = -1; (void) get80211val(s, IEEE80211_IOC_BGSCAN_INTERVAL, &bgscaninterval); if (get80211val(s, IEEE80211_IOC_SCANVALID, &val) != -1) { if (val != bgscaninterval || verbose) LINE_CHECK("scanvalid %u", val); } bgscan = 0; if (get80211val(s, IEEE80211_IOC_BGSCAN, &bgscan) != -1) { if (bgscan) LINE_CHECK("bgscan"); else if (verbose) LINE_CHECK("-bgscan"); } if (bgscan || verbose) { if (bgscaninterval != -1) LINE_CHECK("bgscanintvl %u", bgscaninterval); if (get80211val(s, IEEE80211_IOC_BGSCAN_IDLE, &val) != -1) LINE_CHECK("bgscanidle %u", val); if (!verbose) { getroam(s); rp = &roamparams.params[chan2mode(c)]; if (rp->rssi & 1) LINE_CHECK("roam:rssi %u.5", rp->rssi/2); else LINE_CHECK("roam:rssi %u", rp->rssi/2); LINE_CHECK("roam:rate %u", rp->rate/2); } else { LINE_BREAK(); list_roam(s); LINE_BREAK(); } } if (IEEE80211_IS_CHAN_ANYG(c) || verbose) { if (get80211val(s, IEEE80211_IOC_PUREG, &val) != -1) { if (val) LINE_CHECK("pureg"); else if (verbose) LINE_CHECK("-pureg"); } if (get80211val(s, IEEE80211_IOC_PROTMODE, &val) != -1) { switch (val) { case IEEE80211_PROTMODE_OFF: LINE_CHECK("protmode OFF"); break; case IEEE80211_PROTMODE_CTS: LINE_CHECK("protmode CTS"); break; case IEEE80211_PROTMODE_RTSCTS: LINE_CHECK("protmode RTSCTS"); break; default: LINE_CHECK("protmode UNKNOWN (0x%x)", val); break; } } } if (IEEE80211_IS_CHAN_HT(c) || verbose) { gethtconf(s); switch (htconf & 3) { case 0: case 2: LINE_CHECK("-ht"); break; case 1: LINE_CHECK("ht20"); break; case 3: if (verbose) LINE_CHECK("ht"); break; } if (get80211val(s, IEEE80211_IOC_HTCOMPAT, &val) != -1) { if (!val) LINE_CHECK("-htcompat"); else if (verbose) LINE_CHECK("htcompat"); } if (get80211val(s, IEEE80211_IOC_AMPDU, &val) != -1) { switch (val) { case 0: LINE_CHECK("-ampdu"); break; case 1: LINE_CHECK("ampdutx -ampdurx"); break; case 2: LINE_CHECK("-ampdutx ampdurx"); break; case 3: if (verbose) LINE_CHECK("ampdu"); break; } } /* XXX 11ac density/size is different */ if (get80211val(s, IEEE80211_IOC_AMPDU_LIMIT, &val) != -1) { switch (val) { case IEEE80211_HTCAP_MAXRXAMPDU_8K: LINE_CHECK("ampdulimit 8k"); break; case IEEE80211_HTCAP_MAXRXAMPDU_16K: LINE_CHECK("ampdulimit 16k"); break; case IEEE80211_HTCAP_MAXRXAMPDU_32K: LINE_CHECK("ampdulimit 32k"); break; case IEEE80211_HTCAP_MAXRXAMPDU_64K: LINE_CHECK("ampdulimit 64k"); break; } } /* XXX 11ac density/size is different */ if (get80211val(s, IEEE80211_IOC_AMPDU_DENSITY, &val) != -1) { switch (val) { case IEEE80211_HTCAP_MPDUDENSITY_NA: if (verbose) LINE_CHECK("ampdudensity NA"); break; case IEEE80211_HTCAP_MPDUDENSITY_025: LINE_CHECK("ampdudensity .25"); break; case IEEE80211_HTCAP_MPDUDENSITY_05: LINE_CHECK("ampdudensity .5"); break; case IEEE80211_HTCAP_MPDUDENSITY_1: LINE_CHECK("ampdudensity 1"); break; case IEEE80211_HTCAP_MPDUDENSITY_2: LINE_CHECK("ampdudensity 2"); break; case IEEE80211_HTCAP_MPDUDENSITY_4: LINE_CHECK("ampdudensity 4"); break; case IEEE80211_HTCAP_MPDUDENSITY_8: LINE_CHECK("ampdudensity 8"); break; case IEEE80211_HTCAP_MPDUDENSITY_16: LINE_CHECK("ampdudensity 16"); break; } } if (get80211val(s, IEEE80211_IOC_AMSDU, &val) != -1) { switch (val) { case 0: LINE_CHECK("-amsdu"); break; case 1: LINE_CHECK("amsdutx -amsdurx"); break; case 2: LINE_CHECK("-amsdutx amsdurx"); break; case 3: if (verbose) LINE_CHECK("amsdu"); break; } } /* XXX amsdu limit */ if (get80211val(s, IEEE80211_IOC_SHORTGI, &val) != -1) { if (val) LINE_CHECK("shortgi"); else if (verbose) LINE_CHECK("-shortgi"); } if (get80211val(s, IEEE80211_IOC_HTPROTMODE, &val) != -1) { if (val == IEEE80211_PROTMODE_OFF) LINE_CHECK("htprotmode OFF"); else if (val != IEEE80211_PROTMODE_RTSCTS) LINE_CHECK("htprotmode UNKNOWN (0x%x)", val); else if (verbose) LINE_CHECK("htprotmode RTSCTS"); } if (get80211val(s, IEEE80211_IOC_PUREN, &val) != -1) { if (val) LINE_CHECK("puren"); else if (verbose) LINE_CHECK("-puren"); } if (get80211val(s, IEEE80211_IOC_SMPS, &val) != -1) { if (val == IEEE80211_HTCAP_SMPS_DYNAMIC) LINE_CHECK("smpsdyn"); else if (val == IEEE80211_HTCAP_SMPS_ENA) LINE_CHECK("smps"); else if (verbose) LINE_CHECK("-smps"); } if (get80211val(s, IEEE80211_IOC_RIFS, &val) != -1) { if (val) LINE_CHECK("rifs"); else if (verbose) LINE_CHECK("-rifs"); } /* XXX VHT STBC? */ if (get80211val(s, IEEE80211_IOC_STBC, &val) != -1) { switch (val) { case 0: LINE_CHECK("-stbc"); break; case 1: LINE_CHECK("stbctx -stbcrx"); break; case 2: LINE_CHECK("-stbctx stbcrx"); break; case 3: if (verbose) LINE_CHECK("stbc"); break; } } if (get80211val(s, IEEE80211_IOC_LDPC, &val) != -1) { switch (val) { case 0: LINE_CHECK("-ldpc"); break; case 1: LINE_CHECK("ldpctx -ldpcrx"); break; case 2: LINE_CHECK("-ldpctx ldpcrx"); break; case 3: if (verbose) LINE_CHECK("ldpc"); break; } } } if (IEEE80211_IS_CHAN_VHT(c) || verbose) { getvhtconf(s); if (vhtconf & 0x1) LINE_CHECK("vht"); else LINE_CHECK("-vht"); if (vhtconf & 0x2) LINE_CHECK("vht40"); else LINE_CHECK("-vht40"); if (vhtconf & 0x4) LINE_CHECK("vht80"); else LINE_CHECK("-vht80"); if (vhtconf & 0x8) LINE_CHECK("vht80p80"); else LINE_CHECK("-vht80p80"); if (vhtconf & 0x10) LINE_CHECK("vht160"); else LINE_CHECK("-vht160"); } if (get80211val(s, IEEE80211_IOC_WME, &wme) != -1) { if (wme) LINE_CHECK("wme"); else if (verbose) LINE_CHECK("-wme"); } else wme = 0; if (get80211val(s, IEEE80211_IOC_BURST, &val) != -1) { if (val) LINE_CHECK("burst"); else if (verbose) LINE_CHECK("-burst"); } if (get80211val(s, IEEE80211_IOC_FF, &val) != -1) { if (val) LINE_CHECK("ff"); else if (verbose) LINE_CHECK("-ff"); } if (get80211val(s, IEEE80211_IOC_TURBOP, &val) != -1) { if (val) LINE_CHECK("dturbo"); else if (verbose) LINE_CHECK("-dturbo"); } if (get80211val(s, IEEE80211_IOC_DWDS, &val) != -1) { if (val) LINE_CHECK("dwds"); else if (verbose) LINE_CHECK("-dwds"); } if (opmode == IEEE80211_M_HOSTAP) { if (get80211val(s, IEEE80211_IOC_HIDESSID, &val) != -1) { if (val) LINE_CHECK("hidessid"); else if (verbose) LINE_CHECK("-hidessid"); } if (get80211val(s, IEEE80211_IOC_APBRIDGE, &val) != -1) { if (!val) LINE_CHECK("-apbridge"); else if (verbose) LINE_CHECK("apbridge"); } if (get80211val(s, IEEE80211_IOC_DTIM_PERIOD, &val) != -1) LINE_CHECK("dtimperiod %u", val); if (get80211val(s, IEEE80211_IOC_DOTH, &val) != -1) { if (!val) LINE_CHECK("-doth"); else if (verbose) LINE_CHECK("doth"); } if (get80211val(s, IEEE80211_IOC_DFS, &val) != -1) { if (!val) LINE_CHECK("-dfs"); else if (verbose) LINE_CHECK("dfs"); } if (get80211val(s, IEEE80211_IOC_INACTIVITY, &val) != -1) { if (!val) LINE_CHECK("-inact"); else if (verbose) LINE_CHECK("inact"); } } else { if (get80211val(s, IEEE80211_IOC_ROAMING, &val) != -1) { if (val != IEEE80211_ROAMING_AUTO || verbose) { switch (val) { case IEEE80211_ROAMING_DEVICE: LINE_CHECK("roaming DEVICE"); break; case IEEE80211_ROAMING_AUTO: LINE_CHECK("roaming AUTO"); break; case IEEE80211_ROAMING_MANUAL: LINE_CHECK("roaming MANUAL"); break; default: LINE_CHECK("roaming UNKNOWN (0x%x)", val); break; } } } } if (opmode == IEEE80211_M_AHDEMO) { if (get80211val(s, IEEE80211_IOC_TDMA_SLOT, &val) != -1) LINE_CHECK("tdmaslot %u", val); if (get80211val(s, IEEE80211_IOC_TDMA_SLOTCNT, &val) != -1) LINE_CHECK("tdmaslotcnt %u", val); if (get80211val(s, IEEE80211_IOC_TDMA_SLOTLEN, &val) != -1) LINE_CHECK("tdmaslotlen %u", val); if (get80211val(s, IEEE80211_IOC_TDMA_BINTERVAL, &val) != -1) LINE_CHECK("tdmabintval %u", val); } else if (get80211val(s, IEEE80211_IOC_BEACON_INTERVAL, &val) != -1) { /* XXX default define not visible */ if (val != 100 || verbose) LINE_CHECK("bintval %u", val); } if (wme && verbose) { LINE_BREAK(); list_wme(s); } if (opmode == IEEE80211_M_MBSS) { if (get80211val(s, IEEE80211_IOC_MESH_TTL, &val) != -1) { LINE_CHECK("meshttl %u", val); } if (get80211val(s, IEEE80211_IOC_MESH_AP, &val) != -1) { if (val) LINE_CHECK("meshpeering"); else LINE_CHECK("-meshpeering"); } if (get80211val(s, IEEE80211_IOC_MESH_FWRD, &val) != -1) { if (val) LINE_CHECK("meshforward"); else LINE_CHECK("-meshforward"); } if (get80211val(s, IEEE80211_IOC_MESH_GATE, &val) != -1) { if (val) LINE_CHECK("meshgate"); else LINE_CHECK("-meshgate"); } if (get80211len(s, IEEE80211_IOC_MESH_PR_METRIC, data, 12, &len) != -1) { data[len] = '\0'; LINE_CHECK("meshmetric %s", data); } if (get80211len(s, IEEE80211_IOC_MESH_PR_PATH, data, 12, &len) != -1) { data[len] = '\0'; LINE_CHECK("meshpath %s", data); } if (get80211val(s, IEEE80211_IOC_HWMP_ROOTMODE, &val) != -1) { switch (val) { case IEEE80211_HWMP_ROOTMODE_DISABLED: LINE_CHECK("hwmprootmode DISABLED"); break; case IEEE80211_HWMP_ROOTMODE_NORMAL: LINE_CHECK("hwmprootmode NORMAL"); break; case IEEE80211_HWMP_ROOTMODE_PROACTIVE: LINE_CHECK("hwmprootmode PROACTIVE"); break; case IEEE80211_HWMP_ROOTMODE_RANN: LINE_CHECK("hwmprootmode RANN"); break; default: LINE_CHECK("hwmprootmode UNKNOWN(%d)", val); break; } } if (get80211val(s, IEEE80211_IOC_HWMP_MAXHOPS, &val) != -1) { LINE_CHECK("hwmpmaxhops %u", val); } } LINE_BREAK(); } static int get80211(int s, int type, void *data, int len) { return (lib80211_get80211(s, name, type, data, len)); } static int get80211len(int s, int type, void *data, int len, int *plen) { return (lib80211_get80211len(s, name, type, data, len, plen)); } static int get80211val(int s, int type, int *val) { return (lib80211_get80211val(s, name, type, val)); } static void set80211(int s, int type, int val, int len, void *data) { int ret; ret = lib80211_set80211(s, name, type, val, len, data); if (ret < 0) err(1, "SIOCS80211"); } static const char * get_string(const char *val, const char *sep, u_int8_t *buf, int *lenp) { int len; int hexstr; u_int8_t *p; len = *lenp; p = buf; hexstr = (val[0] == '0' && tolower((u_char)val[1]) == 'x'); if (hexstr) val += 2; for (;;) { if (*val == '\0') break; if (sep != NULL && strchr(sep, *val) != NULL) { val++; break; } if (hexstr) { if (!isxdigit((u_char)val[0])) { warnx("bad hexadecimal digits"); return NULL; } if (!isxdigit((u_char)val[1])) { warnx("odd count hexadecimal digits"); return NULL; } } if (p >= buf + len) { if (hexstr) warnx("hexadecimal digits too long"); else warnx("string too long"); return NULL; } if (hexstr) { #define tohex(x) (isdigit(x) ? (x) - '0' : tolower(x) - 'a' + 10) *p++ = (tohex((u_char)val[0]) << 4) | tohex((u_char)val[1]); #undef tohex val += 2; } else *p++ = *val++; } len = p - buf; /* The string "-" is treated as the empty string. */ if (!hexstr && len == 1 && buf[0] == '-') { len = 0; memset(buf, 0, *lenp); } else if (len < *lenp) memset(p, 0, *lenp - len); *lenp = len; return val; } static void print_string(const u_int8_t *buf, int len) { int i; int hasspc; int utf8; i = 0; hasspc = 0; setlocale(LC_CTYPE, ""); utf8 = strncmp("UTF-8", nl_langinfo(CODESET), 5) == 0; for (; i < len; i++) { if (!isprint(buf[i]) && buf[i] != '\0' && !utf8) break; if (isspace(buf[i])) hasspc++; } if (i == len || utf8) { if (hasspc || len == 0 || buf[0] == '\0') printf("\"%.*s\"", len, buf); else printf("%.*s", len, buf); } else { printf("0x"); for (i = 0; i < len; i++) printf("%02x", buf[i]); } } static void setdefregdomain(int s) { struct regdata *rdp = getregdata(); const struct regdomain *rd; /* Check if regdomain/country was already set by a previous call. */ /* XXX is it possible? */ if (regdomain.regdomain != 0 || regdomain.country != CTRY_DEFAULT) return; getregdomain(s); /* Check if it was already set by the driver. */ if (regdomain.regdomain != 0 || regdomain.country != CTRY_DEFAULT) return; /* Set FCC/US as default. */ rd = lib80211_regdomain_findbysku(rdp, SKU_FCC); if (rd == NULL) errx(1, "FCC regdomain was not found"); regdomain.regdomain = rd->sku; if (rd->cc != NULL) defaultcountry(rd); /* Send changes to net80211. */ setregdomain_cb(s, ®domain); /* Cleanup (so it can be overriden by subsequent parameters). */ regdomain.regdomain = 0; regdomain.country = CTRY_DEFAULT; regdomain.isocc[0] = 0; regdomain.isocc[1] = 0; } /* * Virtual AP cloning support. */ static struct ieee80211_clone_params params = { .icp_opmode = IEEE80211_M_STA, /* default to station mode */ }; static void wlan_create(int s, struct ifreq *ifr) { static const uint8_t zerobssid[IEEE80211_ADDR_LEN]; char orig_name[IFNAMSIZ]; if (params.icp_parent[0] == '\0') errx(1, "must specify a parent device (wlandev) when creating " "a wlan device"); if (params.icp_opmode == IEEE80211_M_WDS && memcmp(params.icp_bssid, zerobssid, sizeof(zerobssid)) == 0) errx(1, "no bssid specified for WDS (use wlanbssid)"); ifr->ifr_data = (caddr_t) ¶ms; if (ioctl(s, SIOCIFCREATE2, ifr) < 0) err(1, "SIOCIFCREATE2"); /* XXX preserve original name for ifclonecreate(). */ strlcpy(orig_name, name, sizeof(orig_name)); strlcpy(name, ifr->ifr_name, sizeof(name)); setdefregdomain(s); strlcpy(name, orig_name, sizeof(name)); } static DECL_CMD_FUNC(set80211clone_wlandev, arg, d) { strlcpy(params.icp_parent, arg, IFNAMSIZ); } static DECL_CMD_FUNC(set80211clone_wlanbssid, arg, d) { const struct ether_addr *ea; ea = ether_aton(arg); if (ea == NULL) errx(1, "%s: cannot parse bssid", arg); memcpy(params.icp_bssid, ea->octet, IEEE80211_ADDR_LEN); } static DECL_CMD_FUNC(set80211clone_wlanaddr, arg, d) { const struct ether_addr *ea; ea = ether_aton(arg); if (ea == NULL) errx(1, "%s: cannot parse address", arg); memcpy(params.icp_macaddr, ea->octet, IEEE80211_ADDR_LEN); params.icp_flags |= IEEE80211_CLONE_MACADDR; } static DECL_CMD_FUNC(set80211clone_wlanmode, arg, d) { #define iseq(a,b) (strncasecmp(a,b,sizeof(b)-1) == 0) if (iseq(arg, "sta")) params.icp_opmode = IEEE80211_M_STA; else if (iseq(arg, "ahdemo") || iseq(arg, "adhoc-demo")) params.icp_opmode = IEEE80211_M_AHDEMO; else if (iseq(arg, "ibss") || iseq(arg, "adhoc")) params.icp_opmode = IEEE80211_M_IBSS; else if (iseq(arg, "ap") || iseq(arg, "host")) params.icp_opmode = IEEE80211_M_HOSTAP; else if (iseq(arg, "wds")) params.icp_opmode = IEEE80211_M_WDS; else if (iseq(arg, "monitor")) params.icp_opmode = IEEE80211_M_MONITOR; else if (iseq(arg, "tdma")) { params.icp_opmode = IEEE80211_M_AHDEMO; params.icp_flags |= IEEE80211_CLONE_TDMA; } else if (iseq(arg, "mesh") || iseq(arg, "mp")) /* mesh point */ params.icp_opmode = IEEE80211_M_MBSS; else errx(1, "Don't know to create %s for %s", arg, name); #undef iseq } static void set80211clone_beacons(const char *val, int d, int s, const struct afswtch *rafp) { /* NB: inverted sense */ if (d) params.icp_flags &= ~IEEE80211_CLONE_NOBEACONS; else params.icp_flags |= IEEE80211_CLONE_NOBEACONS; } static void set80211clone_bssid(const char *val, int d, int s, const struct afswtch *rafp) { if (d) params.icp_flags |= IEEE80211_CLONE_BSSID; else params.icp_flags &= ~IEEE80211_CLONE_BSSID; } static void set80211clone_wdslegacy(const char *val, int d, int s, const struct afswtch *rafp) { if (d) params.icp_flags |= IEEE80211_CLONE_WDSLEGACY; else params.icp_flags &= ~IEEE80211_CLONE_WDSLEGACY; } static struct cmd ieee80211_cmds[] = { DEF_CMD_ARG("ssid", set80211ssid), DEF_CMD_ARG("nwid", set80211ssid), DEF_CMD_ARG("meshid", set80211meshid), DEF_CMD_ARG("stationname", set80211stationname), DEF_CMD_ARG("station", set80211stationname), /* BSD/OS */ DEF_CMD_ARG("channel", set80211channel), DEF_CMD_ARG("authmode", set80211authmode), DEF_CMD_ARG("powersavemode", set80211powersavemode), DEF_CMD("powersave", 1, set80211powersave), DEF_CMD("-powersave", 0, set80211powersave), DEF_CMD_ARG("powersavesleep", set80211powersavesleep), DEF_CMD_ARG("wepmode", set80211wepmode), DEF_CMD("wep", 1, set80211wep), DEF_CMD("-wep", 0, set80211wep), DEF_CMD_ARG("deftxkey", set80211weptxkey), DEF_CMD_ARG("weptxkey", set80211weptxkey), DEF_CMD_ARG("wepkey", set80211wepkey), DEF_CMD_ARG("nwkey", set80211nwkey), /* NetBSD */ DEF_CMD("-nwkey", 0, set80211wep), /* NetBSD */ DEF_CMD_ARG("rtsthreshold", set80211rtsthreshold), DEF_CMD_ARG("protmode", set80211protmode), DEF_CMD_ARG("txpower", set80211txpower), DEF_CMD_ARG("roaming", set80211roaming), DEF_CMD("wme", 1, set80211wme), DEF_CMD("-wme", 0, set80211wme), DEF_CMD("wmm", 1, set80211wme), DEF_CMD("-wmm", 0, set80211wme), DEF_CMD("hidessid", 1, set80211hidessid), DEF_CMD("-hidessid", 0, set80211hidessid), DEF_CMD("apbridge", 1, set80211apbridge), DEF_CMD("-apbridge", 0, set80211apbridge), DEF_CMD_ARG("chanlist", set80211chanlist), DEF_CMD_ARG("bssid", set80211bssid), DEF_CMD_ARG("ap", set80211bssid), DEF_CMD("scan", 0, set80211scan), DEF_CMD_ARG("list", set80211list), DEF_CMD_ARG2("cwmin", set80211cwmin), DEF_CMD_ARG2("cwmax", set80211cwmax), DEF_CMD_ARG2("aifs", set80211aifs), DEF_CMD_ARG2("txoplimit", set80211txoplimit), DEF_CMD_ARG("acm", set80211acm), DEF_CMD_ARG("-acm", set80211noacm), DEF_CMD_ARG("ack", set80211ackpolicy), DEF_CMD_ARG("-ack", set80211noackpolicy), DEF_CMD_ARG2("bss:cwmin", set80211bsscwmin), DEF_CMD_ARG2("bss:cwmax", set80211bsscwmax), DEF_CMD_ARG2("bss:aifs", set80211bssaifs), DEF_CMD_ARG2("bss:txoplimit", set80211bsstxoplimit), DEF_CMD_ARG("dtimperiod", set80211dtimperiod), DEF_CMD_ARG("bintval", set80211bintval), DEF_CMD("mac:open", IEEE80211_MACCMD_POLICY_OPEN, set80211maccmd), DEF_CMD("mac:allow", IEEE80211_MACCMD_POLICY_ALLOW, set80211maccmd), DEF_CMD("mac:deny", IEEE80211_MACCMD_POLICY_DENY, set80211maccmd), DEF_CMD("mac:radius", IEEE80211_MACCMD_POLICY_RADIUS, set80211maccmd), DEF_CMD("mac:flush", IEEE80211_MACCMD_FLUSH, set80211maccmd), DEF_CMD("mac:detach", IEEE80211_MACCMD_DETACH, set80211maccmd), DEF_CMD_ARG("mac:add", set80211addmac), DEF_CMD_ARG("mac:del", set80211delmac), DEF_CMD_ARG("mac:kick", set80211kickmac), DEF_CMD("pureg", 1, set80211pureg), DEF_CMD("-pureg", 0, set80211pureg), DEF_CMD("ff", 1, set80211fastframes), DEF_CMD("-ff", 0, set80211fastframes), DEF_CMD("dturbo", 1, set80211dturbo), DEF_CMD("-dturbo", 0, set80211dturbo), DEF_CMD("bgscan", 1, set80211bgscan), DEF_CMD("-bgscan", 0, set80211bgscan), DEF_CMD_ARG("bgscanidle", set80211bgscanidle), DEF_CMD_ARG("bgscanintvl", set80211bgscanintvl), DEF_CMD_ARG("scanvalid", set80211scanvalid), DEF_CMD("quiet", 1, set80211quiet), DEF_CMD("-quiet", 0, set80211quiet), DEF_CMD_ARG("quiet_count", set80211quietcount), DEF_CMD_ARG("quiet_period", set80211quietperiod), DEF_CMD_ARG("quiet_duration", set80211quietduration), DEF_CMD_ARG("quiet_offset", set80211quietoffset), DEF_CMD_ARG("roam:rssi", set80211roamrssi), DEF_CMD_ARG("roam:rate", set80211roamrate), DEF_CMD_ARG("mcastrate", set80211mcastrate), DEF_CMD_ARG("ucastrate", set80211ucastrate), DEF_CMD_ARG("mgtrate", set80211mgtrate), DEF_CMD_ARG("mgmtrate", set80211mgtrate), DEF_CMD_ARG("maxretry", set80211maxretry), DEF_CMD_ARG("fragthreshold", set80211fragthreshold), DEF_CMD("burst", 1, set80211burst), DEF_CMD("-burst", 0, set80211burst), DEF_CMD_ARG("bmiss", set80211bmissthreshold), DEF_CMD_ARG("bmissthreshold", set80211bmissthreshold), DEF_CMD("shortgi", 1, set80211shortgi), DEF_CMD("-shortgi", 0, set80211shortgi), DEF_CMD("ampdurx", 2, set80211ampdu), DEF_CMD("-ampdurx", -2, set80211ampdu), DEF_CMD("ampdutx", 1, set80211ampdu), DEF_CMD("-ampdutx", -1, set80211ampdu), DEF_CMD("ampdu", 3, set80211ampdu), /* NB: tx+rx */ DEF_CMD("-ampdu", -3, set80211ampdu), DEF_CMD_ARG("ampdulimit", set80211ampdulimit), DEF_CMD_ARG("ampdudensity", set80211ampdudensity), DEF_CMD("amsdurx", 2, set80211amsdu), DEF_CMD("-amsdurx", -2, set80211amsdu), DEF_CMD("amsdutx", 1, set80211amsdu), DEF_CMD("-amsdutx", -1, set80211amsdu), DEF_CMD("amsdu", 3, set80211amsdu), /* NB: tx+rx */ DEF_CMD("-amsdu", -3, set80211amsdu), DEF_CMD_ARG("amsdulimit", set80211amsdulimit), DEF_CMD("stbcrx", 2, set80211stbc), DEF_CMD("-stbcrx", -2, set80211stbc), DEF_CMD("stbctx", 1, set80211stbc), DEF_CMD("-stbctx", -1, set80211stbc), DEF_CMD("stbc", 3, set80211stbc), /* NB: tx+rx */ DEF_CMD("-stbc", -3, set80211stbc), DEF_CMD("ldpcrx", 2, set80211ldpc), DEF_CMD("-ldpcrx", -2, set80211ldpc), DEF_CMD("ldpctx", 1, set80211ldpc), DEF_CMD("-ldpctx", -1, set80211ldpc), DEF_CMD("ldpc", 3, set80211ldpc), /* NB: tx+rx */ DEF_CMD("-ldpc", -3, set80211ldpc), DEF_CMD("puren", 1, set80211puren), DEF_CMD("-puren", 0, set80211puren), DEF_CMD("doth", 1, set80211doth), DEF_CMD("-doth", 0, set80211doth), DEF_CMD("dfs", 1, set80211dfs), DEF_CMD("-dfs", 0, set80211dfs), DEF_CMD("htcompat", 1, set80211htcompat), DEF_CMD("-htcompat", 0, set80211htcompat), DEF_CMD("dwds", 1, set80211dwds), DEF_CMD("-dwds", 0, set80211dwds), DEF_CMD("inact", 1, set80211inact), DEF_CMD("-inact", 0, set80211inact), DEF_CMD("tsn", 1, set80211tsn), DEF_CMD("-tsn", 0, set80211tsn), DEF_CMD_ARG("regdomain", set80211regdomain), DEF_CMD_ARG("country", set80211country), DEF_CMD("indoor", 'I', set80211location), DEF_CMD("-indoor", 'O', set80211location), DEF_CMD("outdoor", 'O', set80211location), DEF_CMD("-outdoor", 'I', set80211location), DEF_CMD("anywhere", ' ', set80211location), DEF_CMD("ecm", 1, set80211ecm), DEF_CMD("-ecm", 0, set80211ecm), DEF_CMD("dotd", 1, set80211dotd), DEF_CMD("-dotd", 0, set80211dotd), DEF_CMD_ARG("htprotmode", set80211htprotmode), DEF_CMD("ht20", 1, set80211htconf), DEF_CMD("-ht20", 0, set80211htconf), DEF_CMD("ht40", 3, set80211htconf), /* NB: 20+40 */ DEF_CMD("-ht40", 0, set80211htconf), DEF_CMD("ht", 3, set80211htconf), /* NB: 20+40 */ DEF_CMD("-ht", 0, set80211htconf), DEF_CMD("vht", 1, set80211vhtconf), DEF_CMD("-vht", 0, set80211vhtconf), DEF_CMD("vht40", 2, set80211vhtconf), DEF_CMD("-vht40", -2, set80211vhtconf), DEF_CMD("vht80", 4, set80211vhtconf), DEF_CMD("-vht80", -4, set80211vhtconf), DEF_CMD("vht80p80", 8, set80211vhtconf), DEF_CMD("-vht80p80", -8, set80211vhtconf), DEF_CMD("vht160", 16, set80211vhtconf), DEF_CMD("-vht160", -16, set80211vhtconf), DEF_CMD("rifs", 1, set80211rifs), DEF_CMD("-rifs", 0, set80211rifs), DEF_CMD("smps", IEEE80211_HTCAP_SMPS_ENA, set80211smps), DEF_CMD("smpsdyn", IEEE80211_HTCAP_SMPS_DYNAMIC, set80211smps), DEF_CMD("-smps", IEEE80211_HTCAP_SMPS_OFF, set80211smps), /* XXX for testing */ DEF_CMD_ARG("chanswitch", set80211chanswitch), DEF_CMD_ARG("tdmaslot", set80211tdmaslot), DEF_CMD_ARG("tdmaslotcnt", set80211tdmaslotcnt), DEF_CMD_ARG("tdmaslotlen", set80211tdmaslotlen), DEF_CMD_ARG("tdmabintval", set80211tdmabintval), DEF_CMD_ARG("meshttl", set80211meshttl), DEF_CMD("meshforward", 1, set80211meshforward), DEF_CMD("-meshforward", 0, set80211meshforward), DEF_CMD("meshgate", 1, set80211meshgate), DEF_CMD("-meshgate", 0, set80211meshgate), DEF_CMD("meshpeering", 1, set80211meshpeering), DEF_CMD("-meshpeering", 0, set80211meshpeering), DEF_CMD_ARG("meshmetric", set80211meshmetric), DEF_CMD_ARG("meshpath", set80211meshpath), DEF_CMD("meshrt:flush", IEEE80211_MESH_RTCMD_FLUSH, set80211meshrtcmd), DEF_CMD_ARG("meshrt:add", set80211addmeshrt), DEF_CMD_ARG("meshrt:del", set80211delmeshrt), DEF_CMD_ARG("hwmprootmode", set80211hwmprootmode), DEF_CMD_ARG("hwmpmaxhops", set80211hwmpmaxhops), /* vap cloning support */ DEF_CLONE_CMD_ARG("wlanaddr", set80211clone_wlanaddr), DEF_CLONE_CMD_ARG("wlanbssid", set80211clone_wlanbssid), DEF_CLONE_CMD_ARG("wlandev", set80211clone_wlandev), DEF_CLONE_CMD_ARG("wlanmode", set80211clone_wlanmode), DEF_CLONE_CMD("beacons", 1, set80211clone_beacons), DEF_CLONE_CMD("-beacons", 0, set80211clone_beacons), DEF_CLONE_CMD("bssid", 1, set80211clone_bssid), DEF_CLONE_CMD("-bssid", 0, set80211clone_bssid), DEF_CLONE_CMD("wdslegacy", 1, set80211clone_wdslegacy), DEF_CLONE_CMD("-wdslegacy", 0, set80211clone_wdslegacy), }; static struct afswtch af_ieee80211 = { .af_name = "af_ieee80211", .af_af = AF_UNSPEC, .af_other_status = ieee80211_status, }; static __constructor void ieee80211_ctor(void) { int i; for (i = 0; i < nitems(ieee80211_cmds); i++) cmd_register(&ieee80211_cmds[i]); af_register(&af_ieee80211); clone_setdefcallback("wlan", wlan_create); } Index: projects/clang700-import/sbin/mount_cd9660/mount_cd9660.8 =================================================================== --- projects/clang700-import/sbin/mount_cd9660/mount_cd9660.8 (revision 337615) +++ projects/clang700-import/sbin/mount_cd9660/mount_cd9660.8 (revision 337616) @@ -1,161 +1,162 @@ .\" Copyright (c) 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" All rights reserved. .\" .\" This code is derived from software donated to Berkeley by .\" Christopher G. Demetriou. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)mount_cd9660.8 8.3 (Berkeley) 3/27/94 .\" $FreeBSD$ .\" -.Dd March 22, 2017 +.Dd August 11, 2018 .Dt MOUNT_CD9660 8 .Os .Sh NAME .Nm mount_cd9660 .Nd mount an ISO-9660 file system .Sh SYNOPSIS .Nm .Op Fl begjrv .Op Fl C Ar charset .Op Fl o Ar options .Op Fl s Ar startsector .Ar special node .Sh DESCRIPTION The .Nm utility attaches the ISO-9660 file system residing on the device .Pa special to the global file system namespace at the location indicated by .Pa node . This command is normally executed by .Xr mount 8 at boot time. .Pp The options are as follows: .Bl -tag -width indent .It Fl b Relax checking for Supplementary Volume Descriptor Flags field which is set to a wrong value on some Joliet formatted disks. .It Fl e Enable the use of extended attributes. .It Fl g Do not strip version numbers on files. (By default, if there are files with different version numbers on the disk, only the last one will be listed.) In either case, files may be opened without explicitly stating a version number. .It Fl j Do not use any Joliet extensions included in the file system. .It Fl o Options are specified with a .Fl o flag followed by a comma separated string of options. See the .Xr mount 8 man page for possible options and their meanings. The following cd9660 specific options are available: .Pp .Bl -tag -width "brokenjoliet" -compact .It Cm extatt Same as .Fl e . .It Cm gens Same as .Fl g . .It Cm nojoliet Same as .Fl j . .It Cm norrip Same as .Fl r . .It Cm brokenjoliet Same as .Fl b . .El .It Fl r Do not use any Rockridge extensions included in the file system. .It Fl s Ar startsector Start the file system at .Ar startsector . Normally, if the underlying device is a CD-ROM drive, .Nm will try to figure out the last track from the CD-ROM containing data, and start the file system there. If the device is not a CD-ROM, or the table of contents cannot be examined, the file system will be started at sector 0. This option can be used to override the behaviour. Note that .Ar startsector is measured in CD-ROM blocks, with 2048 bytes each. This is the same as for example the .Cm info command of .Xr cdcontrol 1 is printing. It is possible to mount an arbitrary session of a multi-session CD by specifying the correct .Ar startsector here. .It Fl C Ar charset Specify local .Ar charset to convert Unicode file names when using Joliet extensions. .It Fl v Be verbose about the starting sector decisions made. .El .Sh EXAMPLES The following command can be used to mount a Kodak Photo-CD: .Pp .Dl "mount_cd9660 -o rw -v -s 0 /dev/cd0 /cdrom" .Sh SEE ALSO .Xr cdcontrol 1 , .Xr mount 2 , .Xr unmount 2 , .Xr cd9660 5 , .Xr fstab 5 , +.Xr mdconfig 8 , .Xr mount 8 .Sh HISTORY The .Nm utility first appeared in .Bx 4.4 . .Pp The Unicode conversion routine was added by .An Ryuichiro Imura Aq Mt imura@ryu16.org in 2003. .Sh BUGS POSIX device node mapping is currently not supported. .Pp Version numbers are not stripped if Rockridge extensions are in use. In this case, accessing files that do not have Rockridge names without version numbers gets the one with the lowest version number and not the one with the highest. .Pp There is no ECMA support. Index: projects/clang700-import/sys/amd64/amd64/efirt_machdep.c =================================================================== --- projects/clang700-import/sys/amd64/amd64/efirt_machdep.c (revision 337615) +++ projects/clang700-import/sys/amd64/amd64/efirt_machdep.c (revision 337616) Property changes on: projects/clang700-import/sys/amd64/amd64/efirt_machdep.c ___________________________________________________________________ Deleted: svn:mergeinfo ## -0,110 +0,0 ## Reverse-merged /projects/release-arm-redux/sys/amd64/amd64/efirt_machdep.c:r278203,278595-278597,278610,280643-280650,280652,280655,282539-282546,282548,282553-282557,282564,282566,282570,282573,282587-282593,282596-282607,282615-282616,282624-282629,282631,282633,282635-282640,282642,282647-282648,282653-282654,282656-282657,282659,282662-282667,282682,282691 Reverse-merged /projects/clang380-import/sys/ia64/ia64/efi.c:r293006,293013 Reverse-merged /stable/11/sys/ia64/ia64/efi.c:r303691,304208,304945,304947,304949-304951,305225-305226,305271,305910,305912,305914 Reverse-merged /projects/elftoolchain/sys/amd64/amd64/efirt_machdep.c:r260687-261245 Reverse-merged /user/ngie/release-pkg-fix-tests/sys/amd64/amd64/efirt_machdep.c:r298865-299093 Reverse-merged /user/ngie/more-tests2/sys/amd64/amd64/efirt_machdep.c:r288935-289179,289223-289224,289226-289227,289230,289236,289325,289437,289440,289478,289484-289486,290904,290921 Reverse-merged /vendor/device-tree/dist/sys/amd64/amd64/efirt_machdep.c:r303380 Reverse-merged /projects/clang360-import/sys/amd64/amd64/efirt_machdep.c:r277327-280030 Reverse-merged /projects/zfsd/head/sys/amd64/amd64/efirt.c:r266519,269993 Reverse-merged /projects/netbsd-tests-upstream-01-2017/sys/amd64/amd64/efirt_machdep.c:r312125-313435 Reverse-merged /projects/clang-sparc64/sys/amd64/amd64/efirt_machdep.c:r262258-262612 Reverse-merged /projects/largeSMP/sys/amd64/amd64/efirt_machdep.c:r221273-222812,222815-223757 Reverse-merged /projects/pf/head/sys/ia64/ia64/efi.c:r263908 Reverse-merged /projects/clang360-import/sys/amd64/amd64/efirt.c:r277327-280030 Reverse-merged /projects/netbsd-tests-update-12/sys/amd64/amd64/efirt_machdep.c:r303985-305318 Reverse-merged /projects/collation/sys/amd64/amd64/efirt_machdep.c:r286424-290491 Reverse-merged /projects/elftoolchain/sys/amd64/amd64/efirt.c:r260687-261245 Reverse-merged /projects/lldb-r201577/sys/amd64/amd64/efirt_machdep.c:r262185-262527 Reverse-merged /projects/release-arm64/sys/amd64/amd64/efirt_machdep.c:r281786,281788,281792 Reverse-merged /projects/zfsd/head/sys/ia64/ia64/efi.c:r266519,269993 Reverse-merged /user/ngie/bsnmp_cleanup/sys/amd64/amd64/efirt_machdep.c:r295193 Reverse-merged /user/ngie/more-tests2/sys/amd64/amd64/efirt.c:r288935-289179,289223-289224,289226-289227,289230,289236,289325,289437,289440,289478,289484-289486,290904,290921 Reverse-merged /projects/elftoolchain-update-r3130/sys/amd64/amd64/efirt_machdep.c:r276164,276167,276170-276172 Reverse-merged /projects/release-arm-redux/sys/amd64/amd64/efirt.c:r278203,278595-278597,278610,280643-280650,280652,280655,282539-282546,282548,282553-282557,282564,282566,282570,282573,282587-282593,282596-282607,282615-282616,282624-282629,282631,282633,282635-282640,282642,282647-282648,282653-282654,282656-282657,282659,282662-282667,282682,282691 Reverse-merged /user/ngie/more-tests/sys/amd64/amd64/efirt_machdep.c:r281427-281428,281430,281432,281450,281460,281464-281465,281485,281489-281491,281515,281519,281589,281593-281597,281619,284388,288316,288321-288327,288422,288476,288478-288481,288483,288578,288650-288651,288655-288656,288659-288661,288663,288673-288676,288680,288828,288930-288932 Reverse-merged /user/ngie/bug203673/sys/amd64/amd64/efirt_machdep.c:r289470-289489 Reverse-merged /projects/elftoolchain/sys/ia64/ia64/efi.c:r260880 Reverse-merged /projects/mpsutil/sys/ia64/ia64/efi.c:r286179-290100 Reverse-merged /projects/building-blocks/sys/amd64/amd64/efirt_machdep.c:r275142-275143,275198,275297,275306-275307,275309,275311,275556,275558,275600,277445,277670,277673 Reverse-merged /head/sys/ia64/ia64/efi.c:r256280,256291,256293-256294,256299,256302,256304,256308,256321-256323,256325,256327-256328,256330-256331,256333-256335,256338,256341,256343,256345,256347-256348,256350,256362,256365,256385,256389,256391,256423,256425,256430,256440,256446,256448,256450,256459,256467,256470,256477,256489,256492,256498-256502,256504,256510,256512,256514,256533,256537,256539-256544,256546-256549,256551-256553,256555-256557,256561,256570-256571,256581,256594,256603,256606-256607,256610,256612-256614,256628,256637-256638,256640,256642-256643,256645-256647,256650,256657-256658,256661,256666,256670,256672,256678,256680,256682,256687,256689-256692,256694-256695,256705,256707-256711,256713-256718,256720-256722,256724,256735,256743-256746,256748,256750,256752-256753,256760-256771,256773-256779,256782,256788,256790,256792-256793,256798-256801,256803-256804,256806,256808-256809,256812-256818,256822,256826-256828,256830,256832-256833,256835-256836,256838-256839,256842-256843,256845-256848,256855,256859-256862,256864-256865,256868,256870-256871,256873,256875,256878,256880,256885,256887-256889,256893,256895,256898-256901,256911-256912,256914-256915,256919-256920,256925-256926,256931-256932,256934-256943,256945,256948-256951,256953,256955-256956,256959-256961,256963,256966-256969,256971-256975,256977-256978,256994-256995,256999,257005-257007,257015-257018,257029,257036-257038,257051,257054-257055,257057,257059-257062,257064-257066,257069-257072,257075,257077-257079,257084,257092-257093,257095-257100,257105,257109,257111,257114-257118,257127,257129-257130,257132,257138-257139,257142-257152,257155,257157-257159,257161-257162,257164-257165,257167-257172,257175,257178,257180,257182-257183,257186,257189-257191,257193,257195-257203,257206-257207,257209-257210,257214-257217,257221-257222,257228,257230-257231,257233-257235,257240,257248,257251,257258,257265-257266,257268,257272,257274,257276-257282,257287-257288,257291,257293,257295,257297-257300,257302,257304-257308,257315,257329,257332,257334-257338,257340-257345,257347,257349-257350,257359,257361,257364,257368-257370,257376-257384,257388,257390,257393,257399-257400,257402-257403,257407-257411,257413-257414,257416-257419,257421-257423,257429,257435,257440,257447,257452-257454,257469,257472,257475-257478,257480,257482-257487,257489-257490,257501,257504-257505,257511-257512,257515-257516,257518-257519,257532,257534,257539,257541-257543,257549,257555-257557,257561,257574,257582-257583,257592,257594-257595,257598,257600,257603-257604,257619-257620,257631,257633,257637-257639,257641,257643,257647-257650,257654,257656-257657,257660-257661,257667-257670,257672-257673,257676,257678-257680,257686,257694-257695,257701-257702,257705,257712,257729-257730,257732,257734,257736,257738-257740,257743,257745-257749,257751,257754-257757,257767,257769-257770,257772,257774-257775,257777,257779-257785,257787-257796,257800-257801,257803-257808,257811,257817-257821,257823-257831,257838,257841-257854,257856,257858-257860,257862-257864,257869-257870,257872-257874,257876-257877,257883,257888,257892,257896,257898-257904,257910,257913-257916,257920,257924,257929-257930,257932-257933,257936-257942,257945-257946,257955,257957-257958,257965,257985,257987,257991-257993,257995-257996,258000-258003,258005,258014,258016-258017,258020-258021,258024-258025,258027-258029,258036,258039,258041-258047,258050-258052,258054,258056-258057,258063-258066,258069,258071,258075-258082,258084,258086,258088,258094-258098,258101,258113-258115,258119,258122,258128,258132,258135,258137-258138,258143,258148-258150,258152-258157,258162,258164,258167-258170,258173,258176-258179,258181,258185,258187,258195-258197,258199-258202,258204-258207,258209-258211,258220-258221,258224,258226-258228,258232-258233,258235,258240,258243-258247,258250-258251,258254,258256-258257,258259,258262-258272,258274-258276,258281,258283,258285-258287,258289-258291,258294,258297-258299,258305,258307-258311,258314,258316-258321,258330-258332,258336-258338,258340,258342,258345,258347-258348,258350-258360,258362-258367,258387-258388,258390,258392-258393,258397,258399-258401,258406-258407,258410-258412,258418,258425,258427-258433,258436,258439,258441,258445,258448,258458,258469,258471,258475-258480,258489,258492,258494-258495,258497,258501,258503-258504,258507,258527,258530,258533,258535,258537,258545-258547,258549,258551-258553,258569-258570,258573-258574,258578-258582,258587-258592,258594,258597,258602,258605,258609,258614-258615,258617-258619,258622,258625,258628-258634,258638,258641-258643,258647-258648,258651,258658-258662,258664,258668-258669,258673,258675,258677-258679,258683,258689-258694,258696-258699,258702,258704-258705,258708-258709,258711-258717,258720,258722,258727-258728,258731-258733,258737,258739-258746,258748,258757-258758,258765,258770,258776,258778-258780,258785-258787,258789-258790,258793-258794,258796-258800,258802,258805-258807,258817,258819-258820,258826,258828,258830,258840,258845,258847,258851,258853-258855,258857,258859-258860,258869,258871,258873,258879,258884,258892-258893,258897,258901-258902,258904,258909,258914,258919,258921,258924,258927-258928,258930-258931,258941,258943,258950,258956,259003,259005,259007,259010,259013-259014,259016-259017,259019,259022-259023,259029-259039,259042-259044,259046-259049,259052-259054,259056-259058,259060,259071-259072,259079,259081-259085,259088-259090,259092,259094-259095,259099-259104,259107-259111,259113,259115,259117-259118,259121-259122,259125-259127,259129-259134,259140,259143-259145,259148,259150,259152,259156,259168-259169,259176,259178-259180,259182-259183,259191-259197,259199,259202-259203,259205,259208-259213,259219-259222,259232,259240,259244-259248,259261,259265-259267,259270,259274-259276,259284,259286-259287,259302,259330-259331,259339,259362,259382,259393-259395,259400,259407,259411,259413,259417,259421,259424-259430,259436-259438,259441,259462,259464,259468-259470,259472-259474,259476-259478,259480-259482,259484,259490,259493,259498,259502,259513-259514,259516-259518,259520-259522,259525-259529,259531-259532,259535,259537,259542-259547,259549-259550,259555,259558,259562,259564-259566,259569-259574,259576,259586-259589,259597-259598,259609,259612,259615,259626,259632-259635,259638,259640-259641,259645,259649-259651,259655,259657,259659,259662-259663,259666-259668,259674-259675,259677,259679-259681,259684-259686,259696,259699,259702,259708-259720,259724,259727-259730,259732-259733,259735-259737,259739-259740,259743-259744,259749-259750,259756,259761-259767,259771-259777,259779-259782,259792,259801,259808,259811-259813,259816,259823,259825-259828,259830,259833,259838-259846,259850,259854-259855,259860,259863,259868-259870,259872-259877,259879-259882,259884-259888,259892-259893,259896-259897,259902,259906,259908,259910,259913,259915-259916,259920-259922,259924-259929,259936-259937,259939-259940,259942-259944,259946,259950-259951,259953,259955,259959,259961,259973,259978,259997-259998,260003,260009,260014-260017,260019-260020,260023,260025-260026,260028,260031,260036,260038-260059,260061,260063-260064,260066-260070,260076,260079,260083,260086-260089,260092-260093,260095,260097,260099,260102-260106,260110-260113,260118,260121,260124-260125,260132,260138,260141-260142,260145,260150-260151,260156-260157,260160-260161,260163,260165-260167,260175,260180-260181,260183-260185,260187-260189,260204-260207,260210,260217,260219-260220,260222,260225,260228-260229,260232-260239,260243,260245-260247,260255,260257-260258,260260-260262,260267,260281-260283,260285,260288,260290,260292,260294-260295,260310-260311,260315,260320,260322-260323,260326-260328,260331-260334,260336,260340,260355,260358,260361,260367,260369,260371-260375,260377,260379-260383,260388-260390,260397,260401,260403,260407,260410,260415,260429,260440-260441,260444-260447,260449-260450,260457,260459-260460,260463,260466,260469,260481,260483-260486,260488,260490-260491,260493-260494,260496,260505-260506,260508-260509,260521-260526,260531-260532,260534-260536,260540-260542,260547,260549-260550,260553,260556-260557,260559,260563,260566-260567,260571,260576-260577,260581-260584,260586,260588-260589,260594-260597,260600,260605,260607,260610,260619,260621,260632-260637,260648,260653-260655,260666,260688,260691,260695-260696,260701-260704,260706,260713,260717,260752,260772,260782,260791,260793,260796,260800-260802,260808,260811-260812,260814,260830-260831,260833,260835-260836,260847,260863,260866,260871-260872,260883-260890,260893-260894,260899-260900,260903-260904,260910-260911,260913-260914,260921,260926,260934,260942,260949,260953,260972-260973,260976-260977,260986-260987,260996,260999-261001,261003-261005,261024,261027-261030,261032-261033,261037-261042,261068,261072,261074-261075,261080-261081,261083-261086,261089,261091-261092,261094-261095,261117-261118,261121-261134,261137-261138,261140-261141,261146-261147,261149-261151,261160-261173,261175,261178,261189,261192-261200,261211-261212,261214-261217,261220,261224,261227-261230,261233-261234,261242-261243,261247,261252,261257-261258,261260-261263,261267-261271,261279,261283-261284,261290-261291,261294,261296,261299-261302,261304-261305,261309,261315,261318-261322,261330,261336-261340,261342-261343,261345,261351-261355,261357-261358,261393-261398,261400-261401,261403-261424,261432,261435-261440,261442,261444-261447,261449,261453,261459,261479,261487,261491,261493-261497,261499-261501,261503-261507,261511-261518,261520-261522,261524-261531,261533-261538,261541-261544,261546-261547,261549-261553,261558,261562-261568,261570,261572,261577,261582-261586,261589-261593,261595-261599,261601,261603-261604,261606-261611,261613-261618,261620-261621,261627,261638-261643,261646-261651,261655-261657,261663,261668,261676-261677,261680-261690,261698,261701-261702,261708,261710-261715,261719,261722-261725,261742,261747-261768,261773-261774,261778,261780-261781,261783,261785-261786,261789-261791,261795-261797,261799-261801,261803-261805,261808,261811,261814-261819,261823-261827,261832-261838,261841-261848,261855,261858-261859,261861,261863,261867,261872,261875,261882-261885,261890-261905,261907-261908,261911,261913-261922,261931-261932,261934,261937-261940,261944-261947,261955-261960,261962,261977-261978,261981-261983,261987,261991,261994,262000,262005-262006,262011,262020,262027-262030,262036,262121,262123,262125,262128-262129,262133,262135-262136,262139-262140,262142-262144,262162,262184,262186,262194,262209,262220,262233,262236,262242-262244,262247,262250-262252,262273-262274,262277-262278,262280-262281,262294,262296,262303,262309-262311,262324-262334,262337-262338,262340-262341,262343-262345,262347,262351,262353-262355,262393-262394,262398-262401,262408-262411,262413,262417-262420,262424-262427,262439-262442,262447,262451,262454-262456,262461-262465,262467,262471-262472,262477-262478,262480,262482-262484,262487-262488,262494-262495,262499,262501-262502,262505-262507,262509,262513,262522-262523,262525-262526,262528,262530-262534,262539-262540,262542-262543,262547-262555,262559,262565,262568,262571-262572,262574-262575,262577,262581,262583-262585,262587,262591-262593,262596-262601,262603,262606-262611,262613-262615,262624-262627,262645-262647,262655,262657,262661,262663-262666,262669-262670,262676,262682,262689-262690,262694-262697,262708-262712,262714,262719,262725-262726,262728,262730,262732-262733,262736,262741,262744,262746-262748,262750,262752,262754-262755,262760,262763,262767,262769-262771,262775,262781-262782,262785,262789,262795,262799,262805-262806,262809-262810,262812,262814,262819,262822,262837,262847,262862,262864-262865,262867-262868,262870,262872,262877,262880,262884-262886,262890-262891,262894-262905,262908-262910,262912,262914,262916,262918-262921,262924-262925,262929-262930,262932,262935-262936,262940-262942,262945,262948-262950,262952-262955,262957-262962,262966,262972,262975,262979-262980,262982,262984-262987,262989-262991,262995,262997,262999,263005,263021,263030,263033-263036,263041,263048-263049,263052,263054,263056-263057,263059,263062,263079-263085,263087,263089-263092,263094-263096,263105-263107,263109,263113-263118,263120-263121,263123-263124,263129-263131,263133-263137,263139-263141,263144-263150,263153-263155,263159-263161,263169,263172-263175,263180-263181,263183-263193,263195,263199,263203-263204,263207,263210-263211,263217,263220-263222,263226-263227,263230-263234,263236-263239,263242-263246,263248-263254,263257,263259,263264-263265,263267,263271,263275-263278,263280,263287-263292,263297,263301-263306,263310-263313,263317,263320,263322-263323,263329,263331-263332,263336,263345-263346,263348-263349,263351-263353,263356,263362,263373,263380,263385,263388,263412,263423-263432,263434-263435,263445-263446,263451,263457-263460,263464,263468,263471,263475,263479,263497-263498,263530,263631-263632,263637-263638,263649-263650,263658,263664,263676,263678-263679,263686,263690-263694,263698,263704,263710-263712,263738,263740,263742-263745,263749,263752-263753,263755,263758,263768,263772,263774-263775,263777-263780,263795,263810-263812,263814-263815,263822,263826,263833,263842,263846-263847,263859,263863,263872-263873,263878-263879,263881,263885,263889-263892,263901,263910,263912-263914,263918-263919,263921-263926,263928,263933-263937,263948-263949,263952,263957,263966,263968-263969,263971,263978-263979,263981-263983,263985-263986,263989-263990,263998,264007-264013,264016-264031,264038-264039,264041,264046,264048-264052,264054-264059,264062,264065,264067-264068,264073,264077-264078,264082-264084,264086-264088,264090-264092,264094-264097,264099-264103,264105,264107,264109-264110,264114-264115,264119-264120,264122,264124-264133,264135,264137-264138,264142,264145-264146,264149-264151,264153,264160-264164,264173-264174,264177,264179-264183,264189-264191,264193-264194,264197,264203-264208,264212-264213,264215,264218-264220,264229-264230,264235,264238,264241-264244,264248,264251,264257-264259,264261-264264,264269,264274-264283,264291,264293-264297,264302,264304,264307-264308,264310-264311,264313-264321,264324,264327,264340-264351,264353-264356,264361-264364,264367,264384-264386,264388-264389,264391-264392,264394,264400,264403-264408,264410,264412-264422,264428,264434-264436,264448,264453,264460-264461,264465,264467-264469,264471,264479-264482,264486,264488-264489,264494,264498-264501,264504,264507,264509,264514-264518,264520,264524-264540,264542,264544-264545,264549-264550,264552,264565,264573,264582,264585,264600-264601,264604-264605,264608-264610,264617,264620-264621,264628,264630-264631,264646,264648,264650-264651,264653,264655-264656,264669-264672,264679,264681-264682,264686,264689,264691,264694,264696-264698,264701-264705,264709,264721,264725,264731,264737-264742,264749,264765-264766,264768-264770,264772,264781,264794-264795,264800-264803,264822,264825-264828,264831-264832,264834-264838,264840-264842,264845-264846,264849-264853,264856-264861,264863-264865,264867,264876-264881,264883-264889,264891-264892,264905,264907-264909,264912,264915-264918,264921-264923,264927,264933-264935,264963-264966,264968,264970,264972,264975,264977-264979,264981-264985,264988,264990,264992,264994-264995,264999,265002-265004,265012-265017,265020,265023-265025,265028,265035-265036,265038,265042,265046,265054,265057-265059,265062-265063,265072,265085,265090,265092,265094,265096-265102,265107,265111,265114,265148-265150,265152,265154-265157,265159,265162-265165,265170-265171,265191,265193-265194,265197,265201,265203,265206-265208,265211,265214,265218,265229-265230,265232,265236-265242,265244,265248-265256,265260-265264,265267,265269-265270,265275-265276,265285,265287,265289,265308,265310,265318,265320-265321,265323,265329,265331,265333,265336,265358-265360,265362-265366,265371,265376,265385-265386,265391-265392,265395,265397-265398,265402-265403,265407,265411,265418,265424,265427,265440-265442,265444-265447,265454-265456,265458,265462-265465,265467-265468,265473,265477,265484-265485,265534-265535,265539,265546,265555,265578,265583,265585,265590,265593-265595,265599,265601-265603,265605,265607,265624,265629-265631,265680-265681,265689-265691,265694,265703,265705,265709,265712-265713,265716,265719,265732,265739,265766,265776-265780,265783-265784,265798,265806,265811,265815,265817,265821-265822,265824-265825,265829,265836,265840,265842-265843,265845,265847,265850,265852-265853,265861-265864,265867,265870-265872,265876,265883,265886-265887,265900,265909,265913-265915,265923,265925-265927,265931,265941-265943,265948,265950-265951,265978,265995,266006-266007,266010-266012,266053,266074,266083,266091-266092,266103-266104,266107-266109,266111,266114,266116,266120-266121,266125,266136,266138-266143,266145,266147,266149-266150,266166,266169,266171,266174,266176-266177,266179-266180,266193,266195,266206,266208-266209,266225,266238,266248,266261,266263,266267,266270,266281,266285,266291,266293,266296-266297,266310,266319-266320,266322-266323,266335-266336,266351,266390,266394,266396,266399,266411,266416-266418,266420,266424,266426,266444-266445,266448,266463-266466,266468,266471-266473,266475-266476,266479,266483-266484,266490-266491,266494-266495,266497,266505,266509-266514,266520,266524,266527-266529,266533-266535,266538,266540-266542,266550-266556,266565-266566,266571-266573,266587-266588,266595-266597,266605-266606,266609,266615,266618-266621,266626-266627,266630,266633,266639,266641-266642,266647,266650-266651,266664,266671,266674-266675,266708-266709,266721,266724-266725,266728,266731,266735-266736,266738,266744,266746,266757,266760,266765,266771-266772,266774-266775,266777-266778,266780,266782,266792-266793,266798-266800,266803,266808,266813-266814,266820-266822,266826-266828,266833,266835-266836,266838-266839,266841-266842,266846-266848,266851-266852,266856-266857,266859-266863,266866,266878-266880,266895,266901-266903,266907-266908,266910,266915,266922-266924,266930-266931,266933-266935,266937-266938,266941-266944,266950-266951,266955,266959-266960,266969,266971,266981,266990,267004,267007,267009,267011-267012,267021,267029,267035,267038,267041,267044,267051,267060,267062,267066-267067,267078-267079,267082,267089-267090,267097-267099,267101,267105,267109,267118-267120,267123-267124,267126-267127,267129-267133,267141,267145-267146,267148,267153,267160,267162,267169,267172-267174,267176,267178-267179,267181,267183-267185,267191,267194,267196,267210-267213,267216,267221,267223,267226-267228,267232,267239-267240,267248,267252-267256,267260-267261,267264-267265,267276,267278,267290-267292,267294,267298,267300-267301,267306,267310-267311,267313,267319-267321,267324,267326-267327,267329-267331,267335-267338,267342,267351,267355-267360,267362,267366,267368,267372-267374,267376,267378,267385-267387,267390-267392,267395,267400,267423,267426,267429,267436-267439,267441,267451,267464,267473,267478-267479,267481-267483,267485-267486,267490-267493,267496-267500,267506,267511-267516,267519-267524,267537,267544,267547,267551,267554,267558-267559,267564,267572,267574,267577-267578,267582,267591,267597,267599-267601,267603,267606,267608-267618,267622-267627,267629-267630,267632,267634,267636-267641,267643,267647-267648,267651,267661-267664,267669-267674,267680,267682,267688-267689,267692-267693,267703-267706,267712,267719,267745,267756-267757,267759,267761,267766-267768,267785,267800-267801,267811-267812,267814-267815,267833-267834,267838-267839,267851-267852,267854,267858-267859,267865-267867,267869,267871-267873,267875,267877,267883-267884,267886-267887,267896-267897,267905-267906,267909,267912,267915,267918-267921,267929,267933-267934,267937,267939-267942,267949,267952,267955,267959-267961,267963,267965-267968,267970,267973,267978,267981-267982,267984-267987,267992-267993,268000-268003,268005-268006,268008,268012,268014,268017-268018,268022,268024-268025,268045,268049-268050,268059,268066,268071-268072,268074-268075,268078-268080,268082-268090,268095-268097,268103,268114-268116,268123,268125-268126,268128,268130,268134-268138,268156,268158-268160,268169,268172,268175,268178,268182,268185,268187,268193,268196,268202-268205,268209,268211-268212,268215,268224,268227,268230-268232,268236,268238,268240,268246,268254,268256,268264-268266,268268,268272-268273,268275-268276,268280,268283-268284,268286-268293,268295,268298-268300,268302-268303,268306-268309,268314-268316,268326-268328,268330,268336-268342,268350,268353-268354,268356-268357,268361-268365,268369-268370,268373,268376,268381,268383-268385,268387-268388,268391-268393,268395,268398,268401-268402,268407,268418-268421,268427-268429,268436-268437,268445-268447,268450,268457,268460,268463-268464,268466-268467,268469-268476,268480-268481,268487-268488,268490,268492-268493,268495,268502,268505,268507,268514,268521,268524-268527,268531-268532,268534,268536-268538,268540-268541,268544,268565,268569-268570,268576,268581-268585,268587-268591,268593,268597,268601,268605-268617,268620-268621,268624-268625,268631-268640,268642-268644,268646,268660,268671,268701,268706,268711-268713,268715,268720,268722,268725-268728,268735,268745-268751,268763-268767,268771-268772,268774,268776-268777,268780,268787,268795-268796,268798-268799,268801-268802,268806-268808,268811-268812,268817,268825,268827,268834-268836,268838-268840,268842-268843,268854-268855,268857-268861,268863-268867,268877-268878,268880,268883,268888-268890,268893,268895,268898,268921-268922,268924,268926,268928-268931,268945,268947-268949,268954,268957,268960,268971,268973-268975,268977,268979,268981-268983,268985-268986,268989,268993-268994,268997-268999,269001,269008,269015-269016,269020-269021,269023,269027,269029,269032,269036,269042-269043,269050-269054,269058,269074-269077,269079-269081,269086,269088-269089,269091-269094,269097-269098,269100,269106,269108-269109,269115-269118,269122-269128,269134,269137-269139,269143,269149,269159-269161,269180,269183-269194,269197,269204-269217,269221-269222,269228-269230,269233,269239-269240,269242,269244,269278,269281-269282,269289-269293,269302-269303,269306,269314,269316-269318,269326,269337-269339,269341,269347,269351-269355,269362-269366,269368,269376,269387,269390,269392-269396,269403-269405,269407-269411,269413-269414,269423-269425,269428,269430-269431,269433,269436-269438,269440-269442,269444-269445,269448,269450,269457,269460,269466,269469-269472,269475,269481,269485,269487-269488,269492,269497,269502,269522,269524-269525,269527-269528,269533-269534,269537,269543,269565-269567,269569,269575-269576,269578,269583,269585,269587,269594,269596-269598,269601,269603-269609,269611,269613,269615,269620,269622,269627,269631,269634,269636,269642-269644,269646,269653,269656,269662,269669,269674,269682,269685,269688,269695,269698,269700-269701,269708-269709,269727-269728,269731,269740,269743-269746,269750,269758-269759,269769-269771,269775-269777,269779-269780,269783,269788,269791,269806,269809-269811,269815,269822,269833,269838-269840,269842,269844,269851-269854,269858-269859,269865,269867,269871,269873,269875,269882,269884,269888,269896,269899,269901-269904,269906-269909,269945,269948,269950,269952-269954,269956,269962-269964,269973-269974,269976-269978,269983-269985,269989,269998,270004-270005,270010-270011,270019,270022-270025,270027-270028,270038,270062,270064-270065,270068-270069,270096,270098,270101,270114-270119,270129,270131-270135,270142,270144-270146,270151-270153,270155-270156,270160,270162,270165,270176,270179-270180,270183,270189-270192,270199-270200,270202-270204,270207,270209-270210,270212,270215-270216,270222,270225,270228-270232,270234,270239,270247-270249,270251,270253,270256,270259-270260,270265,270268-270269,270271-270273,270275-270276,270278-270283,270287-270290,270293,270299-270301,270303-270304,270311,270320-270322,270324,270326-270327,270329,270331-270332,270336,270338,270340-270343,270345,270348-270349,270382-270384,270387-270388,270390-270392,270399,270402,270404-270406,270411-270413,270416-270418,270423,270431-270434,270436-270437,270443,270445-270446,270448,270454-270455,270457,270485,270504-270507,270510,270512-270513,270516,270519,270521-270522,270571-270572,270587,270589,270613,270618,270620,270643,270647-270648,270650,270653,270657,270659-270661,270666-270669,270672-270677,270679,270689,270691,270698,270702,270705-270708,270710,270720-270722,270728,270750,270754,270757,270759,270780-270783,270785-270786,270795,270797-270798,270802-270803,270821-270823,270825-270826,270828-270830,270832-270833,270836,270844-270845,270847,270850,270855,270857-270859,270861,270863,270872,270879,270882,270885,270893,270912,270927-270928,270930-270935,270945,270947-270948,270953-270961,270973,270976,270989,270991-270993,271000,271007-271008,271014,271017-271018,271025-271030,271043,271045-271050,271053-271055,271057,271073,271076-271078,271082-271084,271094,271097-271102,271108,271119,271124,271133,271137,271140-271141,271143,271145-271147,271149,271151,271156-271157,271159,271163,271167-271169,271180,271187,271190,271192,271196-271197,271199,271201-271202,271204,271206-271209,271218-271222,271226-271228,271230,271241,271250,271253,271255-271256,271258-271259,271261,271282-271287,271299,271307-271311,271313,271315-271317,271319-271321,271328,271331,271336,271338,271349-271354,271358,271360,271362,271365-271366,271381-271382,271385,271389,271393-271395,271397-271398,271401,271403,271405,271407-271411,271422,271424,271429,271432-271434,271436-271437,271439,271443,271445-271452,271457-271459,271463-271468,271475,271480,271482-271483,271485,271487,271490-271493,271495-271496,271503,271505-271507,271524,271526-271528,271532,271534-271536,271539,271543,271545-271546,271549-271550,271552-271553,271560,271563,271567,271571,271577-271579,271586,271588-271589,271591,271594-271595,271597,271601,271603-271604,271606-271607,271609-271610,271614,271616-271617,271624,271628,271630,271635,271643-271645,271647-271649,271651,271663-271665,271670,271672-271674,271676-271680,271682,271684,271688-271689,271692,271695-271696,271700,271702,271705,271711,271716-271722,271726-271728,271731,271743,271745,271747,271754,271756,271758-271759,271761-271762,271781,271784-271785,271787-271789,271794,271796-271797,271802,271819,271834,271839,271845,271847,271854,271864,271868-271869,271871-271876,271879,271881-271882,271888-271893,271895,271899-271900,271906-271907,271909-271910,271913,271917-271919,271921,271924,271926-271927,271930-271931,271934,271936,271940-271942,271945-271946,271949,271951,271953-271954,271957-271959,271965,271970-271972,271974-271975,271977-271978,271980,271982,271990,271992,272000,272007,272022-272023,272025,272027-272028,272033,272036-272037,272040,272043-272044,272046,272049,272051-272053,272055-272057,272059,272069-272072,272076,272081,272083-272084,272086-272087,272102-272103,272105-272107,272109,272111,272127,272130,272132,272135,272137-272143,272153,272159,272161,272165,272168,272171,272173-272174,272176,272181-272183,272190,272193,272197-272198,272209,272217,272223-272224,272243,272245,272247-272248,272250,272252-272256,272258-272259,272263,272270,272273-272274,272278,272280-272282,272284,272288-272291,272294,272296,272300,272305,272308,272315,272323-272326,272328-272331,272333-272334,272343,272347-272349,272355-272356,272358-272359,272363,272377-272379,272383-272387,272389,272393-272395,272398-272399,272401-272406,272408-272411,272414,272416,272422,272441,272443-272446,272449,272455,272457-272458,272469-272471,272474-272475,272479-272481,272483-272485,272487,272490-272492,272502-272507,272509-272512,272519,272523,272527-272528,272533-272534,272536-272538,272547-272548,272550-272555,272562,272566-272567,272569,272571,272573-272575,272578-272579,272583-272584,272595,272597-272599,272601-272602,272605-272606,272613,272649-272650,272655,272658-272659,272666,272668,272670-272671,272678-272679,272683,272701,272706,272708,272710,272713,272715,272717-272721,272729-272735,272737-272744,272746-272751,272756-272757,272761-272763,272765,272769-272772,272777-272789,272796-272797,272799-272801,272805-272807,272809-272810,272812,272814-272815,272822,272830,272833,272836,272838-272839,272841-272845,272848,272878,272884-272886,272889-272891,272893,272897,272901-272903,272905-272911,272914-272915,272917,272931-272932,272935-272939,272943,272947,272949-272950,272952,272958,272962-272963,272974,272976,272978-272980,273003-273006,273008,273010-273012,273014-273015,273017-273026,273029,273034,273038,273040,273045-273048,273050-273051,273053,273060-273064,273066-273068,273072-273073,273075,273081,273087-273091,273093,273096,273102,273107-273108,273112,273114,273118,273121,273123-273124,273127,273129-273132,273135,273143-273144,273146,273158-273160,273163-273164,273168,273178,273181,273186,273201,273204,273209-273212,273214,273216,273218-273219,273235-273236,273243,273247-273248,273252,273256-273259,273261,273263-273264,273267,273273,273279-273285,273287-273288,273293,273298,273301,273328-273332,273337-273338,273342,273351-273353,273356,273359-273360,273370-273371,273375-273378,273381-273382,273389-273391,273393,273395-273397,273402,273405,273407,273410,273423,273434,273445,273448,273452,273455,273457,273459,273464-273468,273470,273480,273482,273486,273488-273489,273498,273507,273514-273517,273520-273530,273533-273541,273543-273544,273546,273548,273551,273561,273566,273569,273572,273574,273577-273579,273583-273585,273587,273590-273593,273598-273599,273602,273606-273607,273610,273613,273615,273619,273627-273628,273634-273635,273640-273641,273644,273647,273653,273666,273683,273687,273690,273693,273701,273703-273704,273706,273708,273710-273711,273718,273727-273728,273730-273731,273733,273737-273740,273744,273747-273750,273752-273754,273756,273760,273762,273766,273768-273774,273778-273780,273782-273788,273791,273793,273796-273797,273799-273800,273803,273809-273810,273813,273816,273820-273822,273834,273837,273848-273857,273861-273863,273866-273867,273871,273873,273876,273896,273902,273904,273911,273913-273914,273917-273918,273920,273922,273925-273929,273932-273938,273942-273943,273945-273952,273955,273962-273967,273969,273973,273985-273986,273988-273989,273991-273992,273995,273999,274011,274016-274017,274020-274023,274031,274033,274036,274038,274040,274045,274049-274055,274057,274060-274063,274065-274068,274072-274081,274083-274084,274088,274090-274092,274095,274100,274116-274117,274119-274121,274123-274124,274142-274146,274154,274159,274163-274164,274172,274188-274189,274192-274193,274203,274205-274206,274209,274215,274218,274223,274226-274228,274230,274246,274248-274249,274251,274253-274254,274267,274270,274276-274278,274286,274289,274294,274303-274304,274308-274310,274316,274322,274325,274328,274330,274333,274337,274343,274347,274349,274351,274364-274366,274370,274376-274379,274383,274398,274402,274407,274409-274416,274418,274434-274435,274437-274439,274442,274451-274456,274458,274460-274462,274465-274467,274473-274475,274477-274478,274483-274484,274487,274489-274490,274495-274496,274501-274503,274523,274532,274537-274538,274542,274545,274549,274555-274556,274559,274569,274571-274582,274587,274592-274593,274595-274605,274607,274618-274619,274621,274626-274630,274632-274633,274636,274638-274639,274641-274644,274653,274663,274670-274671,274673-274675,274681,274703,274708-274709,274711,274720,274722-274724,274727,274737-274738,274741-274742,274744,274750,274756,274766,274784-274786,274789-274792,274795-274797,274804-274806,274816-274822,274839-274841,274843,274845-274847,274851-274854,274856-274859,274878,274884,274898,274900,274918,274922,274924,274926,274931,274933,274936-274937,274940-274941,274954,274960,274962,274964-274965,274967,274976,274988,275003,275009,275011,275020,275032-275033,275035,275045-275046,275058,275060,275071,275098,275101,275109-275110,275112,275118,275120-275121,275123-275124,275137,275140,275162-275163,275170,275199,275205-275209,275256,275260,275264,275298,275321-275322,275328,275330,275335-275337,275347,275358-275359,275365-275366,275368,275374,275376,275378,275380,275385-275386,275390,275392-275394,275399,275401,275403-275405,275412,275415-275416,275418,275420,275435,275437-275438,275446-275447,275452,275455,275458-275459,275461,275473-275478,275481-275483,275503,275510,275512-275513,275515,275518-275524,275530-275531,275539,275552-275554,275557,275560-275564,275567-275568,275574-275576,275579,275581-275584,275588-275589,275593-275595,275599,275605-275606,275612,275614-275620,275622,275624,275633,275636-275639,275645-275646,275651,275653,275656-275660,275665-275666,275680-275683,275686-275687,275692,275698,275709,275721,275727,275729,275733-275740,275743-275748,275752-275753,275755-275756,275759-275760,275765,275768,275772,275779-275782,275790-275791,275800,275805-275806,275808,275811-275812,275816-275817,275819-275821,275829,275833,275838,275842,275846-275847,275850-275852,275857,275864-275871,275874,275897,275903,275905-275908,275918,275920,275922-275923,275925,275930,275940-275944,275946,275949-275954,275958-275963,275965,275967,276003-276004,276007-276009,276012-276013,276016,276019,276021,276026-276027,276029,276037,276045-276047,276049,276052-276053,276063,276065-276066,276069,276071,276079,276087,276095,276098,276101,276106,276123,276127,276141,276145-276146,276148,276161-276162,276165-276166,276168,276174,276176,276187,276190-276194,276196-276198,276200,276202-276204,276206,276212-276213,276215,276218,276221,276226,276228-276229,276238-276240,276247,276249-276250,276282,276296-276299,276303-276304,276306,276309,276313-276314,276318-276323,276333-276336,276340,276344-276347,276350-276351,276359,276377,276383,276392,276394-276397,276400,276402,276404,276407,276412,276417,276419,276426,276428,276430,276432,276439,276444-276446,276450,276462,276469-276470,276472,276480,276483,276485,276489,276491,276495,276498,276508-276509,276511-276512,276517-276519,276521-276523,276525,276532,276534,276539,276550,276564,276570-276571,276574,276577,276589-276590,276596-276599,276605,276607,276611-276612,276626-276627,276630,276632,276636,276638,276642,276644-276645,276654,276666,276669,276671,276681,276695,276699,276702-276704,276714,276717,276723-276724,276728-276729,276749,276751,276758,276763-276766,276771,276774-276775,276795,276798-276799,276801,276803-276806,276808,276814-276815,276820,276822-276823,276825,276827-276828,276831-276835,276839-276840,276842,276844,276846-276848,276861,276863,276867,276879-276880,276883,276891-276893,276898,276901-276902,276904,276906-276907,276913-276914,276948-276949,276952,276959,276962,276981-276983,276985,277023,277025-277028,277030-277034,277037-277038,277042-277044,277046-277051,277053,277055,277057,277082,277084-277085,277088,277096,277098-277102,277126,277128,277130,277132,277135-277136,277143,277147,277149,277151,277158,277168-277172,277175,277177,277179,277185,277199,277202,277205-277208,277211-277212,277215-277219,277225-277227,277229-277230,277233-277237,277239,277245-277247,277258-277259,277262,277265,277270,277272,277274,277278,277291,277295,277300-277301,277305-277307,277309-277310,277318,277321-277322,277328,277333,277337,277340,277346-277352,277354-277355,277357-277360,277365,277372,277380,277385,277390-277391,277396,277416-277419,277424,277433,277440,277450,277452-277454,277458,277460,277463-277467,277469-277481,277485,277487-277488,277492,277501,277503-277504,277512-277516,277523,277527,277529,277531-277533,277536,277541,277555,277568,277579,277594,277606,277608-277610,277626-277627,277629,277637,277642-277647,277649-277650,277652,277655-277656,277659,277663,277666,277674-277678,277685-277687,277693,277695,277706,277709-277710,277712-277714,277717,277725-277728,277730-277736,277738-277741,277743,277758-277759,277764,277788,277792-277796,277799,277802,277806,277811,277814-277815,277826-277829,277834,277836-277839,277841,277853-277855,277862,277868-277869,277877,277883-277895,277898-277899,277901,277903,277907,277914-277915,277917,277920-277922,277925-277927,277936,277938-277939,277944,277948-277949,277951-277953,277957-277959,277962,277969-277970,277972,277974,277988-277989,278000-278001,278004,278010,278016,278031-278032,278034,278037-278038,278040,278047,278053,278061,278070-278071,278074,278098-278099,278101,278103,278105,278111,278114,278118-278119,278135-278137,278145-278148,278151-278154,278159-278161,278166-278167,278172-278173,278182,278192-278193,278202,278204,278206,278209,278212-278213,278220-278222,278228,278233-278234,278237,278239,278248-278251,278254-278255,278257,278268,278282,278292,278297,278300,278302-278303,278311,278313-278314,278316,278320-278323,278325,278327,278335-278340,278342,278348,278352,278354,278360,278362,278364,278370-278372,278374,278379,278398,278402,278433,278438,278449,278458-278462,278464-278470,278472,278474-278477,278483,278485,278488-278493,278500,278502-278503,278518-278519,278521,278523,278526,278530,278532,278551,278582,278584,278586,278594,278598,278600,278603,278605-278607,278618-278619,278621-278623,278625,278627,278633,278636,278640,278653,278655,278658,278672,278681-278682,278697,278704,278706,278728,278739,278742,278751,278760-278761,278767,278770,278776,278790-278791,278794-278795,278802-278803,278806,278810,278817-278819,278826-278828,278830-278844,278848-278850,278856-278858,278860,278865-278866,278868,278870-278871,278874,278888-278889,278891,278902,278905,278915,278924-278926,278932-278933,278937-278942,278963-278964,278970,278976,278983-278984,279016-279017,279033,279037,279046-279048,279050,279076-279078,279080-279081,279083-279084,279089,279091-279092,279094-279098,279114,279117,279121,279123,279125-279128,279139,279141-279147,279172-279183,279186,279197-279198,279205-279206,279209-279210,279215,279219-279220,279223,279225,279227-279233,279235-279236,279239-279246,279248-279251,279253,279256-279257,279261,279266,279268,279270,279275-279278,279281-279284,279297,279301,279307,279310-279312,279314-279318,279320-279321,279324,279326,279330,279336,279338,279346,279351,279359-279362,279364,279366-279368,279375,279385,279390,279393,279395-279396,279398,279400-279401,279410-279411,279413,279421-279440,279444,279489,279491,279493,279531,279534,279536,279538,279540,279543-279544,279551,279554,279559,279563-279564,279567-279568,279570-279571,279573,279584,279587-279591,279593,279597-279600,279624,279642,279651-279652,279654,279657-279658,279673,279675-279676,279683-279684,279691,279695,279698,279700-279702,279706,279720,279722-279726,279728-279730,279735,279738,279756-279758,279760,279764,279766,279773,279776,279779,279802,279806-279808,279810-279813,279815-279816,279819,279821,279824-279827,279837,279841-279843,279845-279846,279850-279851,279854,279856,279858-279860,279862-279865,279867,279869,279875,279886,279892,279894,279896,279901-279903,279910,279913-279916,279919-279920,279925,279927,279929,279931,279935-279936,279941,279945,279949-279955,279957,279959-279960,279963,279965,279967-279969,279975-279977,279979-279980,279987,279996,280004,280017,280026,280037,280040-280044,280047,280090,280121,280125-280126,280130,280133-280134,280154,280160-280164,280166,280169,280172,280177-280180,280182-280183,280187,280191,280194-280197,280204,280206,280208,280210-280211,280221-280222,280228,280230-280232,280234-280236,280238,280249,280252-280254,280262-280264,280278-280279,280286,280293,280297,280299,280301,280307-280308,280310-280311,280321-280323,280325,280330-280331,280336,280342,280345,280347,280351,280357,280360,280371,280374-280380,280382-280385,280387-280388,280393,280402-280404,280429-280435,280439-280440,280447,280451,280459,280463,280475,280495,280572-280574,280597-280598,280630,280634,280640,280642,280685-280688,280700,280702,280709,280713-280714,280716,280721,280725,280756-280758,280760,280763-280765,280767-280768,280772,280775,280780-280782,280786,280792-280793,280797,280805,280807-280808,280814,280816,280818-280819,280822,280830,280834,280840,280845-280846,280848-280850,280858,280861,280864,280866,280870,280878-280879,280881-280882,280884,280893-280895,280904,280914-280916,280919-280926,280928-280931,280933-280939,280949-280950,280953-280957,280959,280962,280968,280974-280976,280980-280981,280983,280988,280990-280991,280998-280999,281002-281003,281005-281006,281009,281011,281015-281016,281026-281027,281039,281053,281058-281060,281070-281071,281073-281074,281077,281081-281082,281084,281086,281094,281107-281109,281112-281113,281116-281117,281120-281121,281136,281138,281145-281146,281159-281160,281162-281167,281169-281171,281176-281182,281199-281200,281202,281206,281209,281216,281225,281234,281236,281254,281265-281266,281271-281272,281275,281280-281283,281285,281307-281311,281316,281320,281324,281331,281335,281337,281354-281356,281358-281362,281367,281371-281372,281380-281381,281383,281387,281391-281404,281406-281412,281440-281441,281451,281462,281470,281475,281483,281495,281499-281500,281502,281524,281529,281531-281532,281536-281537,281540,281542,281544,281548-281550,281558-281559,281561-281563,281581-281582,281591-281593,281599,281601,281605,281611-281612,281616-281618,281626,281628,281630,281655,281666-281667,281670,281674,281677,281689,281696,281698,281700-281701,281703-281704,281712,281723,281726,281728,281734-281736,281738,281745,281751-281752,281756,281762-281769,281772-281773,281775,281777,281780-281783,281785,281787,281789,281795,281800,281809,281812,281820,281823,281825,281828,281832,281838,281840,281857,281860,281870-281873,281875,281877,281879,281881,281883-281884,281887,281915-281916,281918,281920,281923-281924,281928-281930,281932,281941,281944,281946,281956,281959-281960,281962,281966-281967,281984,281987,282017,282023-282025,282041-282042,282054,282056-282057,282059,282061-282063,282067,282071-282076,282084-282089,282092,282097,282104,282106,282109,282112,282115-282116,282119-282122,282125-282128,282130-282131,282133-282138,282144,282148,282152,282201,282205-282206,282208-282209,282211-282213,282215,282236,282238,282241,282244-282245,282247,282254,282257,282259,282261,282266,282269,282273,282277,282281,282284,282287,282289-282291,282293,282296,282299-282301,282304,282312,282328,282331,282335-282336,282339,282341,282344-282345,282351,282364,282407-282408,282415-282419,282424,282429,282434-282436,282443,282465,282469,282473,282475,282482,282485,282500,282505,282516,282519-282520,282524-282533,282536,282550-282552,282558,282560,282563,282565,282567,282571,282574,282577-282578,282594-282595,282608,282613,282632,282641,282643,282645-282646,282650-282652,282658,282660-282661,282679-282681,282683,282685-282687,282690,282693,282697-282700,282706,282708-282709,282712-282713,282716,282718-282721,282727,282730-282731,282739-282741,282743,282747,282766,282772,282784-282785,282787-282799,282805,282809-282810,282817,282821,282857,282863,282865-282866,282880-282881,282885,282888,282897-282905,282908,282914,282916,282921-282922,282932,282940-282942,282944,282948,282965,282967,282978,282996-282998,283000-283001,283007-283009,283013-283014,283018,283023,283025,283032-283033,283035,283048-283051,283053,283056,283061-283064,283066-283067,283069,283075,283092-283093,283101-283106,283110,283114-283115,283117,283119,283121,283123,283126,283128,283141,283144,283146-283147,283149,283153,283162-283163,283168,283170,283245,283252,283254-283257,283261,283264-283266,283268-283269,283271-283274,283278,283281-283282,283285-283286,283288,283290,283293,283295,283298-283299,283301-283302,283307-283308,283313,283320,283330,283357,283364,283369-283386,283388-283403,283405-283408,283410-283425,283427-283456,283459-283468,283470-283476,283478-283480,283482-283484,283486-283498,283502,283506,283514-283515,283524-283525,283542,283544,283546-283547,283562,283569,283573-283578,283580,283593,283599-283602,283604,283612-283613,283618,283622,283624,283629-283630,283635,283645,283647-283648,283650,283654,283657-283658,283661-283662,283664-283666,283673-283674,283679-283681,283691-283692,283695,283735,283745,283753,283784-283786,283795,283801,283806,283808-283811,283813-283820,283832,283836,283840-283843,283845,283858,283863-283864,283869-283870,283889,283891,283893-283897,283913,283919-283920,283922-283924,283929,283933,283936,283939,283959,283961-283962,283966,283968,283973,283975,283988,283991-283992,284000,284007,284010-284013,284022-284025,284032,284044,284046,284049,284051,284102,284104-284107,284110-284114,284117-284130,284133,284135,284137,284139-284140,284148-284149,284151,284157,284159,284163,284165-284167,284174-284175,284179-284180,284192,284207,284222,284229,284237,284245,284247-284250,284254,284260,284264,284269-284270,284277,284280,284283,284289-284290,284296-284297,284301,284303-284304,284306,284308-284309,284323-284326,284329,284331-284333,284335,284346,284348,284351-284355,284377-284378,284383-284386,284392-284393,284405,284408-284409,284416,284423,284425-284428,284436,284445,284447,284470,284477,284495,284512-284513,284515,284526-284528,284531,284539-284542,284547,284551-284552,284567,284578,284582,284589,284591,284593-284594,284596,284600-284601,284604,284607-284609,284611-284612,284617,284622,284626-284627,284630,284636,284639-284641,284644,284646,284649,284654-284656,284658,284660,284672,284676,284681-284683,284688,284691,284697-284698,284709,284712,284717-284719,284722,284724,284727-284728,284730,284739,284741,284743,284746-284749,284765,284777,284779-284780,284792,284808,284811-284812,284814,284858,284863-284864,284872-284873,284875,284877,284882-284884,284887,284889,284893,284895-284897,284913-284916,284918-284921,284927-284931,284933,284941-284942,284956,284965,284968-284969,284984,284988,284996,285005-285006,285010-285011,285020-285021,285028-285030,285037,285039,285041-285042,285046,285050-285051,285053,285059,285064,285066-285067,285086,285088-285089,285100,285104,285113,285118,285133-285134,285136-285138,285140-285141,285146-285147,285154-285158,285169-285170,285173,285188,285190,285204,285217-285221,285225-285226,285229-285231,285237,285240,285246,285248,285251-285253,285256,285260-285261,285269,285279,285282,285284,285318,285325,285329,285339-285340,285384,285394-285396,285398,285401,285403,285405-285406,285408-285409,285411-285415,285418,285420,285424,285426-285428,285430,285433-285435,285440-285444,285457,285459,285481-285483,285510,285512-285513,285522,285527-285529,285531,285543,285552-285554,285557,285590-285592,285594,285600,285621,285623,285628,285630,285638-285639,285642-285644,285648,285651,285657-285658,285663-285664,285667,285669,285671-285676,285678-285679,285684,285701,285710,285715,285719-285720,285722,285730,285732-285733,285735,285742,285767-285768,285773,285775-285776,285782-285783,285785,285792,285796,285798,285815-285816,285837-285844,285847,285858-285859,285869-285870,285873,285875,285877-285879,285881-285882,285886-285889,285891,285909,285912-285914,285925,285932,285935,285938,285944-285948,285958,285960,285972-285973,285975,285984-285985,285989,285996-285997,285999,286010,286017,286024,286043,286045-286047,286066,286074,286086,286090,286092,286102,286106-286107,286118,286131,286139-286140,286142,286150-286152,286154-286158,286162-286163,286167,286169,286173,286177,286196-286204,286206,286208,286210-286211,286215,286217-286218,286223,286226,286228,286234,286237-286238,286243,286256,286258-286260,286265,286281,286283,286285,286288-286289,286293,286304,286317,286320-286321,286328,286330-286331,286338,286341,286344-286345,286353,286358,286360-286361,286367-286370,286375,286378,286380-286383,286385,286388-286389,286395,286400,286402,286404-286406,286409,286414,286417-286419,286423,286429,286447-286448,286451,286456,286461-286462,286469,286490-286491,286503-286506,286510,286512-286516,286519,286539,286541,286543,286545,286547,286549,286551,286554,286556,286561-286562,286569-286570,286574-286576,286578-286579,286582,286587,286589,286591,286593,286595-286596,286598,286600-286601,286603,286605,286613,286615,286617,286621-286623,286625-286626,286628,286641,286647,286649,286652,286655,286660,286667,286677,286683,286686,286689,286699-286702,286705,286708,286710,286712,286719-286720,286723,286733,286737,286762-286764,286766-286767,286770,286773-286778,286780-286781,286784-286785,286790,286798-286799,286802,286805-286809,286811,286814,286816,286822,286827,286829-286834,286836-286838,286848-286849,286857,286860,286862,286867-286868,286873,286886-286888,286890-286894,286910,286913-286914,286926,286933,286937-286940,286942-286944,286947,286951,286962-286967,286970-286971,286974-286975,286981-286983,286985-286986,286991,286993,286999,287004-287005,287009,287012,287020-287021,287025,287033-287034,287081,287093-287095,287097-287100,287103,287107,287109,287112,287117,287119-287121,287123,287125,287138,287143,287148,287151-287153,287155,287178-287179,287182-287183,287208,287216,287220-287221,287234-287238,287247,287264-287265,287280-287284,287289,287292-287294,287299-287300,287309-287310,287317,287319-287321,287324,287327,287330,287335-287337,287340,287345,287349,287354-287355,287357-287358,287360-287361,287366,287368-287369,287372,287374,287376,287378-287386,287389-287390,287395-287397,287400,287402-287406,287413-287414,287420-287422,287429,287432-287433,287436-287437,287440,287442,287444-287445,287448,287453-287459,287465,287468,287473,287475,287479,287483,287485,287489,287493-287494,287497,287499-287500,287520,287522,287528,287534-287535,287537-287538,287541,287563-287564,287567,287574-287576,287579-287581,287590-287592,287595,287599,287607-287611,287613-287621,287633-287635,287649-287654,287664,287669-287671,287673-287675,287683,287685-287698,287701-287707,287711-287712,287714-287715,287717-287721,287724-287726,287728,287744-287745,287747-287748,287753-287754,287756-287758,287760,287764-287768,287770-287772,287774-287775,287778,287783-287785,287799,287803,287806-287807,287816,287818-287819,287821,287823,287825,287827-287828,287830-287831,287833,287842-287843,287855,287860,287864,287866,287868,287870,287875,287880,287886,287912-287913,287917-287918,287920-287921,287927,287930,287933-287935,287937,287940,287944,287951,287955-287957,287964,287967-287968,287978-287983,287986,287988,287991-287994,287997-287998,288000-288001,288006,288020-288021,288025,288031-288033,288043-288044,288057,288059,288061,288064,288067-288068,288070-288071,288075,288081,288083,288091-288092,288099,288104,288110-288111,288116,288120,288138,288143,288146,288148,288153-288154,288158,288160,288165-288166,288170,288175,288179-288180,288198-288201,288204,288208,288211,288213-288217,288220-288221,288224,288229-288230,288233,288238-288239,288246-288249,288258-288262,288264,288266-288267,288271-288273,288276,288278,288281,288295,288298,288303-288306,288309-288310,288330,288335-288337,288339-288341,288345,288347-288348,288358-288359,288361-288372,288374,288380-288381,288390-288391,288405-288406,288419-288420,288423-288424,288427,288429-288430,288446-288450,288452,288454-288456,288458,288470,288477,288486,288488,288490,288522,288524,288528-288529,288575,288579,288600,288625-288626,288653,288678,288826,288829,288832,288834,288902-288903,288905-288907,288910,288914,288944,288949-288950,288952-288953,288959-288963,288981,288984,288993-288994,288997,289001,289017,289026,289028-289031,289038,289041,289055,289058,289060,289063,289065,289067,289080,289083-289084,289091,289093,289095,289097-289098,289102,289104-289105,289109-289111,289113,289118,289136,289138,289146,289151-289158,289180,289186,289190-289192,289194-289195,289199,289203,289205-289209,289219,289225,289229,289231-289234,289238,289240,289255,289257-289259,289265-289266,289271-289274,289280-289281,289286,289289,289293,289295,289297,289299-289300,289305,289307,289309,289313,289315-289316,289321-289322,289324,289332,289337,289340-289349,289360-289362,289374-289375,289378-289379,289393,289396-289397,289407,289419-289422,289425,289430,289441,289445-289446,289450-289453,289469,289477,289480,289487-289488,289496-289497,289499-289500,289527-289528,289531,289536,289538-289546,289560,289562-289563,289570,289577,289592,289596-289598,289600-289602,289604-289605,289607-289620,289622,289626,289634-289637,289643,289645-289654,289656-289658,289660-289661,289664,289669,289676-289677,289681,289687,289693,289702,289704,289719,289726-289727,289732-289733,289736,289739,289743,289746,289755,289760,289764,289768-289769,289774-289777,289783,289790,289793-289794,289797,289812,289817,289819,289822-289824,289837-289838,289842-289843,289852,289855,289863,289866-289867,289870-289875,289877-289879,289881-289882,289886,289888,289890,289895-289897,289899,289901-289913,289916,289930-289933,289937,289939-289940,289942,289979-289980,289982-289983,290003-290004,290006-290008,290018,290020-290021,290023-290024,290028,290042,290047,290054,290073,290083-290084,290087,290102,290104,290110,290116,290118,290121-290122,290129-290132,290138-290140,290143-290144,290147,290153,290158-290161,290163-290164,290169-290170,290173-290174,290177-290182,290184,290188,290190-290191,290195,290229-290232,290236,290251,290253,290255,290259-290260,290262,290264-290268,290270,290275,290316,290320,290326,290329,290336-290337,290367,290370,290374,290387,290399-290401,290404-290406,290408,290412,290414-290416,290426,290428-290429,290431,290435,290437,290440-290442,290444,290450-290452,290458,290462,290466,290468,290480,290489,290492,290504,290506-290507,290515,290521,290532,290537-290540,290542,290548-290550,290560-290561,290563,290566-290567,290571-290573,290601,290615,290639,290641,290645-290647,290650,290659-290660,290662,290665,290670-290674,290678-290689,290693,290708-290710,290725,290728-290729,290742-290743,290810-290813,290820,290830,290843-290851,290855-290856,290860,290868-290871,290903,290905,290907-290909,290911-290915,290917,290920,290922,290929-290930,290946,290948,290959,290970,290978,290980-290981,290993-290994,291000-291001,291004,291012-291014,291016-291017,291022,291024,291026,291028-291035,291038,291047,291061,291067-291072,291078,291080-291081,291084-291085,291088,291091-291092,291097,291099,291114,291117,291120-291121,291125-291126,291132,291137-291138,291140-291141,291143-291147,291149-291150,291155-291164,291166-291169,291172,291180-291181,291188,291197-291199,291207,291209,291221,291225-291226,291238,291260-291261,291263,291265-291266,291280,291296,291301,291306,291329-291331,291338-291340,291342,291348-291349,291358-291359,291362-291365,291367,291369,291375-291377,291379-291380,291383,291390-291398,291408,291410,291432,291434,291436,291446,291453,291459-291461,291481,291488,291527,291534-291536,291545,291569-291570,291576,291578-291579,291582,291584-291588,291590,291605,291610-291611,291615,291637-291638,291641,291651,291653-291654,291657-291659,291671,291677-291680,291682,291684,291690-291691,291693-291694,291699-291700,291703,291705-291706,291716,291724,291727,291730,291741-291743,291746-291747,291752-291753,291766,291770,291793,291821,291826,291832-291839,291843,291845-291849,291862,291868,291872,291876,291891-291892,291896,291904,291908,291911,291919,291922-291928,291931-291932,291936,291938-291939,291941-291942,291947-291948,291950,291953-291954,291957,291978-291985,292000,292003-292005,292007-292013,292031-292032,292034,292038,292041-292042,292044,292046-292048,292050-292053,292055,292057-292060,292066,292069,292074,292086,292088,292090,292092-292093,292106,292121-292123,292128-292130,292135,292153,292206,292212,292216,292218,292226-292229,292234,292249-292250,292254,292258,292262-292263,292266,292277,292289-292290,292313,292316-292319,292323-292324,292327-292328,292330,292332,292337-292338,292355,292360,292366,292384,292394,292408-292411,292413,292419,292432-292435,292441-292446,292454-292455,292485,292489,292491-292493,292495-292497,292500-292501,292504,292507-292510,292513-292515,292517-292518,292522-292523,292527,292530-292533,292537-292539,292541-292546,292550,292552-292554,292558,292563,292569-292570,292573,292578,292581-292585,292595,292601-292608,292610,292620-292621,292638-292641,292647,292650-292651,292653-292654,292658,292661,292665,292669-292670,292674,292676,292682,292690,292697,292705,292710,292715,292719,292725,292733-292734,292739,292741,292743,292745,292749,292751-292752,292757,292759,292764-292765,292813-292816,292818-292820,292822,292829,292834-292838,292840,292846-292847,292849,292859,292861,292872,292877-292878,292884,292888,292890-292892,292894,292896,292899,292914,292943,292946-292950,292953,292957,292960-292961,292983,292989,292996,292999-293001,293014-293015,293028-293029,293032-293034,293037,293042-293043,293045-293049,293053,293055,293059,293061,293063-293065,293073,293105,293165,293173,293188,293190,293192,293221,293229,293231,293233-293234,293244-293245,293268-293269,293271,293274,293281,293305,293312,293331-293332,293334,293338,293340,293342-293343,293346,293349-293350,293357,293370,293379,293390,293414,293422-293423,293434,293436-293438,293440-293444,293452,293454,293458-293461,293469,293491,293612-293613,293617,293621-293622,293627,293653,293658-293659,293677,293679,293683,293704-293705,293708,293715,293719-293722,293724,293730-293734,293740,293745,293748-293758,293761-293770,293772-293775,293781,293783,293792,293796,293805-293815,293817-293819,293821,293825,293828,293830-293831,293833,293835,293851,293854-293856,293858,293860,293868-293871,293873-293875,293877-293878,293880,293887-293892,293895,293899-293902,293905,293913,293977,294027,294029,294032,294040-294042,294048,294057-294060,294062,294068,294072-294073,294075-294081,294091-294094,294102-294103,294123,294125-294128,294137,294183,294191,294196,294200-294201,294233-294235,294237,294249-294257,294259,294265,294284,294291,294309-294314,294317-294320,294322,294324-294326,294328,294330,294332-294333,294335-294336,294347,294358,294362,294366-294367,294370-294373,294407,294414,294452,294463-294464,294466-294467,294469-294470,294493-294498,294504,294506-294507,294514-294515,294520,294526,294530,294548-294549,294553-294554,294556-294557,294563,294565,294567,294578,294594-294598,294620-294621,294625,294652-294653,294655,294669,294684,294688,294694-294695,294700-294703,294705,294732,294734-294735,294749,294753,294765-294769,294773,294788,294794-294795,294797,294799,294801,294803,294805,294807,294809,294811,294813,294815,294817,294820,294832,294840,294847,294854,294860-294863,294873,294876-294878,294884,294886,294891-294894,294899-294900,294909,294915,294922-294926,294933,294935-294936,294949,294952-294953,294957,294965,294967-294968,294973,294995,295006,295012,295017,295021-295022,295026-295027,295029-295030,295032,295047,295051,295069-295070,295072,295074-295075,295077,295079,295093-295094,295116-295119,295121,295125,295133-295134,295139,295147,295159,295161,295174,295186,295202,295209,295234,295273,295276-295277,295295-295309,295320,295323-295324,295341,295345,295352,295356-295357,295384-295385,295391,295407-295408,295416-295419,295455,295463,295465,295467,295471,295477,295486-295489,295495-295497,295532-295533,295535-295536,295549,295562,295568,295574-295576,295578-295580,295583-295584,295588,295603-295606,295608,295616,295618,295632,295636-295637,295649,295651,295665,295668,295670-295672,295675,295677-295678,295708-295710,295717,295729-295730,295732,295737,295740-295749,295768,295771-295773,295792-295794,295800,295805,295810-295811,295822-295823,295830,295836,295844,295856,295861,295875-295877,295900-295901,295906,295914,295916,295918-295919,295923-295925,295928-295930,295944,295958,295964,295966-295967,295969,295976-295977,295980,295994-295995,295998,296000-296002,296009,296012,296014,296020-296022,296024-296025,296028,296063,296071,296076,296083-296089,296095,296122,296148,296178,296180-296181,296184,296187-296188,296252-296253,296278,296285-296286,296289-296291,296293,296296-296297,296299-296300,296305,296319-296320,296322,296342,296348,296379-296381,296388,296392,296394,296416,296419,296428,296449,296467,296470,296472-296473,296479,296501-296502,296510,296512,296514,296516,296519,296521,296523-296526,296528,296530,296533,296535,296537,296539,296541-296543,296546,296554,296557,296563,296567,296575,296579,296585,296593-296595,296610,296613,296615,296617,296623,296633-296634,296643,296651-296657,296673-296674,296707,296766,296769,296776,296799,296807,296816,296819,296899,296902,296908-296910,296914,296919,296922,296926,296932,296934,296956,296984,296987-296988,297023,297037,297039,297049,297051-297052,297060-297063,297070,297072,297137,297139,297142-297143,297147-297151,297155-297161,297172,297176-297183,297187,297196,297200-297203,297206,297212,297219-297221,297229,297232,297243,297255-297256,297265,297282,297296-297298,297308-297309,297311,297323-297326,297330,297334-297335,297337,297358,297360,297363-297364,297367,297370,297374,297386-297387,297391,297397-297398,297420-297422,297444,297456,297475-297476,297479,297484,297488,297507-297509,297513,297519-297522,297525,297527,297535,297557,297571,297618,297626,297634-297637,297672-297673,297678,297685,297688,297695-297696,297709,297741,297746,297751,297763,297771,297781,297791,297796,297800-297805,297807-297812,297815,297817-297818,297820,297827,297832,297836-297838,297841,297846,297848,297854,297856-297859,297867-297869,297871,297873,297884,297908,297912-297913,297915,297921,297925-297926,297931-297933,297942,297947,297963,297966-297968,297984-297986,297991,297999,298004-298005,298008,298012-298014,298017,298022,298024,298030-298031,298038-298043,298072,298088,298101,298103,298105-298106,298111-298112,298114,298131,298156,298173,298176,298192,298196,298212,298224,298259-298260,298270,298279-298280,298294-298295,298301,298303-298304,298311-298312,298318-298319,298328,298333-298334,298336-298340,298355,298366,298368,298370,298372,298377,298379-298380,298385,298402,298408,298420,298424,298439,298446,298448-298452,298458,298462-298464,298472-298473,298482-298484,298495-298496,298507,298516,298518-298525,298530,298551,298556,298561-298566,298568-298572,298574-298575,298584-298585,298589,298591,298597,298609,298613-298618,298620,298640,298644,298655-298656,298660,298664-298665,298668-298672,298676-298680,298683,298688-298690,298693-298696,298698,298703,298730-298732,298734-298737,298744,298750,298753,298755,298758-298759,298769,298771-298772,298783,298787,298809,298817,298819,298829,298831,298839,298842,298844-298845,298864,298868,298874,298881-298883,298885,298887-298888,298890-298891,298893,298898,298901,298904,298921-298922,298931,298950,298977,298981-298984,298987-298989,298996,299004,299015,299035,299060,299077,299085,299087,299089,299114-299115,299121,299162-299163,299196,299201,299205,299211,299213,299226,299234,299238,299242,299249-299254,299263,299265-299272,299274-299276,299310,299315,299318-299340,299344-299348,299350,299371,299373,299375,299377,299387,299401-299408,299410-299413,299460-299461,299465-299466,299484,299489-299491,299494-299496,299502-299508,299510-299511,299513-299515,299517-299518,299524-299525,299529,299539,299559,299573,299576-299581,299585,299589,299591-299612,299654-299655,299657,299659,299666-299672,299681,299691,299694,299699,299701,299710-299712,299718-299724,299726-299734,299736,299738,299753,299759-299767,299769-299770,299774,299778,299783,299802-299803,299805-299808,299810-299811,299814-299817,299829,299831-299834,299839-299841,299843-299844,299865-299869,299872-299873,299879-299880,299888-299890,299892-299894,299896-299909,299911-299927,299938,299940,299945-299949,299951-299953,299955,299957,299971,299977,299986,299988,299991,300002,300005,300007-300011,300024,300027,300030,300052,300058,300072,300082-300084,300089-300090,300100-300102,300105,300107-300108,300111-300112,300120-300124,300126-300127,300129-300135,300142,300145,300157,300167,300169,300177,300179-300180,300217-300218,300220,300222-300224,300231,300240,300253,300258-300260,300277,300280,300282,300293,300296,300301,300304-300305,300307,300319,300322,300327,300332-300333,300340-300341,300356,300359-300360,300364-300365,300376,300378,300385-300389,300395,300397,300411-300416,300420,300428-300432,300442,300455,300478-300481,300486-300487,300489,300501,300505,300508,300531,300547,300564-300565,300567-300574,300576,300596,300605-300610,300620-300622,300624-300625,300632-300633,300635-300636,300638,300642-300655,300660,300662,300664-300667,300670,300683-300684,300691-300692,300705-300706,300708,300714,300739,300747,300756,300758,300775,300779,300781,300783-300784,300792-300793,300809,300825,300827,300830-300832,300834,300836,300840-300842,300844-300849,300851,300856-300858,300861-300862,300867,300870,300874,300884,300890-300893,300931-300932,300934,300936-300939,300941-300942,300945,300947,300949,300953,300956,300959,300965,300967,300970,300972-300973,300981,300983,300985,300987-300989,300992-300994,300998,301007,301009,301015,301017-301022,301039,301053,301061,301065,301067,301069,301075-301076,301090,301101-301103,301105-301106,301109,301113,301115,301122,301125,301127,301130,301138-301139,301162,301180,301203,301206,301210,301212,301235,301237,301247,301273,301275,301278-301279,301291-301293,301295-301297,301300,301308-301309,301404,301406,301414,301427,301448,301457,301460,301465,301483-301484,301487-301488,301495,301513,301522,301532,301544-301545,301549-301550,301570,301574,301580,301582-301584,301588,301592,301596,301604-301605,301607,301683,301704,301707-301708,301710,301724,301737,301745,301749,301752,301758,301760,301764,301773,301775,301800,301842,301851,301853,301870-301871,301873-301874,301877,301928-301929,301956-301957,301959-301960,301962-301964,301966-301969,301974,302013-302014,302019-302020,302031,302036,302063,302075-302076,302080,302091,302123,302125,302139,302141,302151,302171,302174-302175,302179,302181,302196,302206,302210,302213,302221-302222,302236,302265,302278,302294,302296,302305-302306,302317,302326-302327,302330,302335-302336,302338,302346,302362-302365,302371,302373,302379,302382,302384,302402,302459-302460,302476,302482-302484,302486-302497,302499,302504,302507-302508,302510-302511,302515-302518,302520,302529-302531,302534,302541-302542,302550-302553,302558,302567,302573,302580,302605,302614,302622,302626,302635,302666-302668,302673,302770-302772,302778,302797,302824,302826-302827,302834-302840,302845,302855-302857,302861,302893,302899-302900,302902,302908,302911,302919,302921,302932-302933,302936,302943-302944,302946-302947,302957,302973,302999-303001,303003-303004,303009-303011,303013-303014,303031,303059,303074,303076,303084,303086,303088,303090,303101,303109-303110,303142,303147,303151,303155,303160,303208,303210-303211,303217,303225,303244,303266,303287,303338,303393,303399-303400,303423-303426,303429,303437,303461,303494,303503,303510,303514,303516,303530,303536,303551,303553-303554,303561,303564-303565,303573,303576,303638,303663,303676,303702,303704-303706,303710,303713,303715,303720,303737-303738,303760,303763,303765-303766,303773,303782,303791,303794-303795,303799-303800,303804,303806,303830,303855,303869-303870,303889,303891-303894,303897,303913-303914,303916,303919,303929-303935,303937,303942,303964,303971,303982,304008-304009,304011-304012,304018,304033-304034,304040,304055,304075,304149,304152,304162,304174,304184,304202,304227-304229,304231-304232,304238,304247-304249,304251,304286,304342,304440,304476,304487,304520-304521,304571,304597,304601,304607-304608,304629,304637,304641,304652,304674,304693-304694,304698,304703,304721,304747,304751,304755,304779-304782,304802,304808-304812,304819,304825,304874,304911,304928,304931,304953,304959,304964,304969,304989,305012,305023-305024,305033,305041,305077,305085-305086,305090,305123,305129,305133,305140,305144,305154,305170,305177,305219,305241,305269,305284,305344-305345,305356-305357,305360,305365,305388-305389,305406,305409,305412-305413,305421-305422,305430,305469,305536,305590,305679,305744,305939 Reverse-merged /user/ngie/more-tests/sys/amd64/amd64/efirt.c:r281427-281428,281430,281432,281450,281460,281464-281465,281485,281489-281491,281515,281519,281589,281593-281597,281619,284388,288316,288321-288327,288422,288476,288478-288481,288483,288578,288650-288651,288655-288656,288659-288661,288663,288673-288676,288680,288828,288930-288932 Reverse-merged /user/ngie/more-tests2/sys/ia64/ia64/efi.c:r288935-288940,288942-289179,289223-289224,289226-289227,289230,289236,289325,289437,289440,289484-289486,290904 Reverse-merged /projects/largeSMP/sys/amd64/amd64/efirt.c:r221273-222812,222815-223757 Reverse-merged /projects/building-blocks/sys/amd64/amd64/efirt.c:r275142-275143,275198,275297,275306-275307,275309,275311,275556,275558,275600,277445,277670,277673 Reverse-merged /projects/clang370-import/sys/amd64/amd64/efirt_machdep.c:r287506-288928 Reverse-merged /projects/quota64/sys/amd64/amd64/efirt_machdep.c:r184125-207707 Reverse-merged /projects/release-arm-redux/sys/ia64/ia64/efi.c:r278203,278595-278597,278610,280643-280650,280652,280655,282539-282546,282548,282553-282557,282564,282566,282570,282573,282587-282593,282596-282607,282615-282616,282624-282629,282631,282633,282635-282640,282642,282647-282648,282653-282654,282656-282657,282659,282662-282667,282682,282691 Reverse-merged /user/ngie/make_check/sys/amd64/amd64/efirt_machdep.c:r291879-295379 Reverse-merged /projects/contrib-netbsd-update-12/sys/amd64/amd64/efirt_machdep.c:r303899-303984 Reverse-merged /projects/cxl_iscsi/sys/amd64/amd64/efirt_machdep.c:r291227-291228,292618 Reverse-merged /projects/lldb-r201577/sys/amd64/amd64/efirt.c:r262185-262527 Reverse-merged /projects/clang370-import/sys/amd64/amd64/efirt.c:r287506-288928 Reverse-merged /projects/bsd_rdma_4_9/sys/amd64/amd64/efirt_machdep.c:r325505-326168 Reverse-merged /projects/quota64/sys/amd64/amd64/efirt.c:r184125-207707 Reverse-merged /projects/clang-sparc64/sys/amd64/amd64/efirt.c:r262258-262612 Reverse-merged /user/ngie/make_check/sys/amd64/amd64/efirt.c:r291879-295379 Reverse-merged /user/delphij/zfs-arc-rebase/sys/amd64/amd64/efirt.c:r281754 Reverse-merged /projects/lldb-r201577/sys/ia64/ia64/efi.c:r262185-262527 Reverse-merged /projects/release-pkg/sys/amd64/amd64/efirt_machdep.c:r274131-298104 Reverse-merged /projects/collation/sys/amd64/amd64/efirt.c:r286424-290491 Reverse-merged /projects/release-arm64/sys/amd64/amd64/efirt.c:r281786,281788,281792 Reverse-merged /user/ngie/stable-10-fix-LINT-NOINET/sys/ia64/ia64/efi.c:r293623-293724 Reverse-merged /projects/release-pkg/sys/amd64/amd64/efirt.c:r274131-298104 Reverse-merged /user/ngie/socket-tests/sys/amd64/amd64/efirt_machdep.c:r293882-293885,294103,294117,294119-294120,294245-294247,294488,294555,294643-294644 Reverse-merged /projects/clang-sparc64/sys/ia64/ia64/efi.c:r262258-262612 Reverse-merged /projects/elftoolchain-update-r3130/sys/amd64/amd64/efirt.c:r276164,276167,276170-276172 Reverse-merged /projects/ipfw/sys/amd64/amd64/efirt_machdep.c:r267383-272837 Reverse-merged /projects/clang500-import/sys/amd64/amd64/efirt_machdep.c:r316992-321352 Reverse-merged /projects/clang400-import/sys/amd64/amd64/efirt_machdep.c:r311132-314524 Reverse-merged /projects/contrib-netbsd-update-12/sys/amd64/amd64/efirt.c:r303899-303984 Reverse-merged /projects/vnet/sys/amd64/amd64/efirt_machdep.c:r295220 Reverse-merged /projects/bsnmp-improved-ipv6-support/sys/amd64/amd64/efirt_machdep.c:r301868 Reverse-merged /projects/clang500-import/sys/amd64/amd64/efirt.c:r316992-321352 Reverse-merged /projects/pms/sys/amd64/amd64/efirt_machdep.c:r285199-285661 Reverse-merged /projects/clang380-import/sys/amd64/amd64/efirt_machdep.c:r292913-296412 Reverse-merged /projects/release-embedded/sys/amd64/amd64/efirt_machdep.c:r262314,262504,262510-262511,262580,262660,262662,262700,262713,262774,262786-262788,262790-262792,262798,262802,262808 Reverse-merged /user/ngie/more-tests/sys/ia64/ia64/efi.c:r281427-281428,281430,281432,281450,281460,281464-281465,281485,281489-281491,281515,281519,281589,281593-281597,281619,284388,288316,288321-288327,288422,288476,288478-288481,288483,288578,288650-288651,288655-288656,288659-288661,288663,288673-288676,288680,288828,288930-288931 Reverse-merged /projects/bsnmp-improved-ipv6-support/sys/amd64/amd64/efirt.c:r301868 Reverse-merged /projects/random_number_generator/sys/amd64/amd64/efirt_machdep.c:r254613-256243 Reverse-merged /projects/pms/sys/amd64/amd64/efirt.c:r285199-285661 Reverse-merged /projects/head_mfi/sys/amd64/amd64/efirt_machdep.c:r233621 Reverse-merged /projects/building-blocks/sys/ia64/ia64/efi.c:r275142-275143,275306-275307,275556,275558,277445,277670,277673 Reverse-merged /projects/clang380-import/sys/amd64/amd64/efirt.c:r292913-296412 Reverse-merged /user/ngie/stable-10-libnv/sys/ia64/ia64/efi.c:r292587-292857,293093,303608 Reverse-merged /projects/netbsd-tests-update-12/sys/ia64/ia64/efi.c:r304237 Reverse-merged /projects/cxl_iscsi/sys/amd64/amd64/efirt.c:r291227-291228,292618 Reverse-merged /projects/random_number_generator/sys/amd64/amd64/efirt.c:r254613-256243 Reverse-merged /user/ngie/release-pkg-fix-tests/sys/amd64/amd64/efirt.c:r298865-299093 Reverse-merged /projects/ipfw/sys/amd64/amd64/efirt.c:r267383-272837 Reverse-merged /projects/pf/head/sys/amd64/amd64/efirt_machdep.c:r263908 Reverse-merged /user/delphij/zfs-arc-rebase/sys/amd64/amd64/efirt_machdep.c:r281754 Reverse-merged /user/ngie/bsnmp_cleanup/sys/ia64/ia64/efi.c:r295193 Reverse-merged /projects/clang-trunk/sys/amd64/amd64/efirt_machdep.c:r283596-287505 Reverse-merged /projects/vnet/sys/amd64/amd64/efirt.c:r295220 Reverse-merged /vendor/device-tree/dist/sys/amd64/amd64/efirt.c:r303380 Reverse-merged /projects/multi-fibv6/head/sys/amd64/amd64/efirt_machdep.c:r230929-231848 Reverse-merged /projects/pf/head/sys/amd64/amd64/efirt.c:r263908 Reverse-merged /projects/release-embedded/sys/amd64/amd64/efirt.c:r262314,262504,262510-262511,262580,262660,262662,262700,262713,262774,262786-262788,262790-262792,262798,262802,262808 Reverse-merged /projects/clang-trunk/sys/amd64/amd64/efirt.c:r283596-287505 Reverse-merged /projects/zfsd/head/sys/amd64/amd64/efirt_machdep.c:r266519,269993 Reverse-merged /user/ngie/socket-tests/sys/amd64/amd64/efirt.c:r293882-293885,294103,294117,294119-294120,294245-294247,294488,294555,294643-294644 Reverse-merged /projects/netbsd-tests-update-12/sys/amd64/amd64/efirt.c:r303985-305318 Reverse-merged /projects/head_mfi/sys/amd64/amd64/efirt.c:r233621 Reverse-merged /projects/clang350-import/sys/amd64/amd64/efirt_machdep.c:r274961-275126,275128-275133,275135-276476 Reverse-merged /vendor/resolver/dist/sys/amd64/amd64/efirt_machdep.c:r1540-186085 Reverse-merged /projects/clang390-import/sys/amd64/amd64/efirt_machdep.c:r303250-309123 Reverse-merged /projects/clang391-import/sys/amd64/amd64/efirt_machdep.c:r309166-310192 Reverse-merged /stable/10/usr.bin/head/sys/ia64/ia64/efi.c:r265256 Reverse-merged /projects/clang350-import/sys/amd64/amd64/efirt.c:r274961-275126,275128-275133,275135-276476 Reverse-merged /vendor/resolver/dist/sys/amd64/amd64/efirt.c:r1540-186085 Reverse-merged /projects/mpsutil/sys/amd64/amd64/efirt_machdep.c:r286179-290100 Reverse-merged /head/sys/amd64/amd64/efirt_machdep.c:r319973-323558,336870-337585 Reverse-merged /user/ngie/bsnmp_cleanup/sys/amd64/amd64/efirt.c:r295193 Reverse-merged /projects/clang390-import/sys/amd64/amd64/efirt.c:r306303-309123 Reverse-merged /projects/clang391-import/sys/amd64/amd64/efirt.c:r309166-310192 Reverse-merged /user/ngie/bug203673/sys/amd64/amd64/efirt.c:r289470-289489 Reverse-merged /projects/mpsutil/sys/amd64/amd64/efirt.c:r286179-290100 Reverse-merged /head/sys/amd64/amd64/efirt.c:r303250-305891 Reverse-merged /user/ngie/socket-tests/sys/ia64/ia64/efi.c:r293882-293885,294103,294117,294119-294120 Reverse-merged /projects/multi-fibv6/head/sys/amd64/amd64/efirt.c:r230929-231848 Index: projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (revision 337615) +++ projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c (revision 337616) @@ -1,8470 +1,8512 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 Martin Matuska . All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome * Copyright 2017 Joyent, Inc. * Copyright (c) 2017 Datto Inc. * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. */ /* * SPA: Storage Pool Allocator * * This file contains all the routines used when modifying on-disk SPA state. * This includes opening, importing, destroying, exporting a pool, and syncing a * pool. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #include #endif /* _KERNEL */ #include "zfs_prop.h" #include "zfs_comutil.h" /* Check hostid on import? */ static int check_hostid = 1; /* * The interval, in seconds, at which failed configuration cache file writes * should be retried. */ int zfs_ccw_retry_interval = 300; SYSCTL_DECL(_vfs_zfs); SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RWTUN, &check_hostid, 0, "Check hostid on import?"); TUNABLE_INT("vfs.zfs.ccw_retry_interval", &zfs_ccw_retry_interval); SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RW, &zfs_ccw_retry_interval, 0, "Configuration cache file write, retry after failure, interval (seconds)"); typedef enum zti_modes { ZTI_MODE_FIXED, /* value is # of threads (min 1) */ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ ZTI_MODE_NULL, /* don't create a taskq */ ZTI_NMODES } zti_modes_t; #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 } #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } #define ZTI_N(n) ZTI_P(n, 1) #define ZTI_ONE ZTI_N(1) typedef struct zio_taskq_info { zti_modes_t zti_mode; uint_t zti_value; uint_t zti_count; } zio_taskq_info_t; static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = { "issue", "issue_high", "intr", "intr_high" }; /* * This table defines the taskq settings for each ZFS I/O type. When * initializing a pool, we use this table to create an appropriately sized * taskq. Some operations are low volume and therefore have a small, static * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE * macros. Other operations process a large amount of data; the ZTI_BATCH * macro causes us to create a taskq oriented for throughput. Some operations * are so high frequency and short-lived that the taskq itself can become a a * point of lock contention. The ZTI_P(#, #) macro indicates that we need an * additional degree of parallelism specified by the number of threads per- * taskq and the number of taskqs; when dispatching an event in this case, the * particular taskq is chosen at random. * * The different taskq priorities are to handle the different contexts (issue * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that * need to be handled with minimum delay. */ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = { /* ISSUE ISSUE_HIGH INTR INTR_HIGH */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */ { ZTI_N(8), ZTI_NULL, ZTI_P(12, 8), ZTI_NULL }, /* READ */ { ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */ { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */ { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */ }; static void spa_sync_version(void *arg, dmu_tx_t *tx); static void spa_sync_props(void *arg, dmu_tx_t *tx); static boolean_t spa_has_active_shared_spare(spa_t *spa); static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport); static void spa_vdev_resilver_done(spa_t *spa); uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */ #ifdef PSRSET_BIND id_t zio_taskq_psrset_bind = PS_NONE; #endif #ifdef SYSDC boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ uint_t zio_taskq_basedc = 80; /* base duty cycle */ #endif boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ extern int zfs_sync_pass_deferred_free; /* * Report any spa_load_verify errors found, but do not fail spa_load. * This is used by zdb to analyze non-idle pools. */ boolean_t spa_load_verify_dryrun = B_FALSE; /* * This (illegal) pool name is used when temporarily importing a spa_t in order * to get the vdev stats associated with the imported devices. */ #define TRYIMPORT_NAME "$import" /* * For debugging purposes: print out vdev tree during pool import. */ int spa_load_print_vdev_tree = B_FALSE; /* * A non-zero value for zfs_max_missing_tvds means that we allow importing * pools with missing top-level vdevs. This is strictly intended for advanced * pool recovery cases since missing data is almost inevitable. Pools with * missing devices can only be imported read-only for safety reasons, and their * fail-mode will be automatically set to "continue". * * With 1 missing vdev we should be able to import the pool and mount all * datasets. User data that was not modified after the missing device has been * added should be recoverable. This means that snapshots created prior to the * addition of that device should be completely intact. * * With 2 missing vdevs, some datasets may fail to mount since there are * dataset statistics that are stored as regular metadata. Some data might be * recoverable if those vdevs were added recently. * * With 3 or more missing vdevs, the pool is severely damaged and MOS entries * may be missing entirely. Chances of data recovery are very low. Note that * there are also risks of performing an inadvertent rewind as we might be * missing all the vdevs with the latest uberblocks. */ uint64_t zfs_max_missing_tvds = 0; /* * The parameters below are similar to zfs_max_missing_tvds but are only * intended for a preliminary open of the pool with an untrusted config which * might be incomplete or out-dated. * * We are more tolerant for pools opened from a cachefile since we could have * an out-dated cachefile where a device removal was not registered. * We could have set the limit arbitrarily high but in the case where devices * are really missing we would want to return the proper error codes; we chose * SPA_DVAS_PER_BP - 1 so that some copies of the MOS would still be available * and we get a chance to retrieve the trusted config. */ uint64_t zfs_max_missing_tvds_cachefile = SPA_DVAS_PER_BP - 1; /* * In the case where config was assembled by scanning device paths (/dev/dsks * by default) we are less tolerant since all the existing devices should have * been detected and we want spa_load to return the right error codes. */ uint64_t zfs_max_missing_tvds_scan = 0; SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_print_vdev_tree, CTLFLAG_RWTUN, &spa_load_print_vdev_tree, 0, "print out vdev tree during pool import"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds, CTLFLAG_RWTUN, &zfs_max_missing_tvds, 0, "allow importing pools with missing top-level vdevs"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN, &zfs_max_missing_tvds_cachefile, 0, "allow importing pools with missing top-level vdevs in cache file"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN, &zfs_max_missing_tvds_scan, 0, "allow importing pools with missing top-level vdevs during scan"); /* * Debugging aid that pauses spa_sync() towards the end. */ boolean_t zfs_pause_spa_sync = B_FALSE; /* * ========================================================================== * SPA properties routines * ========================================================================== */ /* * Add a (source=src, propname=propval) list to an nvlist. */ static void spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, uint64_t intval, zprop_source_t src) { const char *propname = zpool_prop_to_name(prop); nvlist_t *propval; VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); if (strval != NULL) VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); else VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); nvlist_free(propval); } /* * Get property values from the spa configuration. */ static void spa_prop_get_config(spa_t *spa, nvlist_t **nvp) { vdev_t *rvd = spa->spa_root_vdev; dsl_pool_t *pool = spa->spa_dsl_pool; uint64_t size, alloc, cap, version; zprop_source_t src = ZPROP_SRC_NONE; spa_config_dirent_t *dp; metaslab_class_t *mc = spa_normal_class(spa); ASSERT(MUTEX_HELD(&spa->spa_props_lock)); if (rvd != NULL) { alloc = metaslab_class_get_alloc(spa_normal_class(spa)); size = metaslab_class_get_space(spa_normal_class(spa)); spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL, size - alloc, src); spa_prop_add_list(*nvp, ZPOOL_PROP_CHECKPOINT, NULL, spa->spa_checkpoint_info.sci_dspace, src); spa_prop_add_list(*nvp, ZPOOL_PROP_FRAGMENTATION, NULL, metaslab_class_fragmentation(mc), src); spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, metaslab_class_expandable_space(mc), src); spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL, (spa_mode(spa) == FREAD), src); cap = (size == 0) ? 0 : (alloc * 100 / size); spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src); spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL, ddt_get_pool_dedup_ratio(spa), src); spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL, rvd->vdev_state, src); version = spa_version(spa); if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) src = ZPROP_SRC_DEFAULT; else src = ZPROP_SRC_LOCAL; spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src); } if (pool != NULL) { /* * The $FREE directory was introduced in SPA_VERSION_DEADLISTS, * when opening pools before this version freedir will be NULL. */ if (pool->dp_free_dir != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, dsl_dir_phys(pool->dp_free_dir)->dd_used_bytes, src); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL, 0, src); } if (pool->dp_leak_dir != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, dsl_dir_phys(pool->dp_leak_dir)->dd_used_bytes, src); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_LEAKED, NULL, 0, src); } } spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src); if (spa->spa_comment != NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment, 0, ZPROP_SRC_LOCAL); } if (spa->spa_root != NULL) spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root, 0, ZPROP_SRC_LOCAL); if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) { spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE); } else { spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL, SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE); } if ((dp = list_head(&spa->spa_config_list)) != NULL) { if (dp->scd_path == NULL) { spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, "none", 0, ZPROP_SRC_LOCAL); } else if (strcmp(dp->scd_path, spa_config_path) != 0) { spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE, dp->scd_path, 0, ZPROP_SRC_LOCAL); } } } /* * Get zpool property values. */ int spa_prop_get(spa_t *spa, nvlist_t **nvp) { objset_t *mos = spa->spa_meta_objset; zap_cursor_t zc; zap_attribute_t za; int err; VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); mutex_enter(&spa->spa_props_lock); /* * Get properties from the spa config. */ spa_prop_get_config(spa, nvp); /* If no pool property object, no more prop to get. */ if (mos == NULL || spa->spa_pool_props_object == 0) { mutex_exit(&spa->spa_props_lock); return (0); } /* * Get properties from the MOS pool property object. */ for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object); (err = zap_cursor_retrieve(&zc, &za)) == 0; zap_cursor_advance(&zc)) { uint64_t intval = 0; char *strval = NULL; zprop_source_t src = ZPROP_SRC_DEFAULT; zpool_prop_t prop; if ((prop = zpool_name_to_prop(za.za_name)) == ZPOOL_PROP_INVAL) continue; switch (za.za_integer_length) { case 8: /* integer property */ if (za.za_first_integer != zpool_prop_default_numeric(prop)) src = ZPROP_SRC_LOCAL; if (prop == ZPOOL_PROP_BOOTFS) { dsl_pool_t *dp; dsl_dataset_t *ds = NULL; dp = spa_get_dsl(spa); dsl_pool_config_enter(dp, FTAG); err = dsl_dataset_hold_obj(dp, za.za_first_integer, FTAG, &ds); if (err != 0) { dsl_pool_config_exit(dp, FTAG); break; } strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); dsl_pool_config_exit(dp, FTAG); } else { strval = NULL; intval = za.za_first_integer; } spa_prop_add_list(*nvp, prop, strval, intval, src); if (strval != NULL) kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN); break; case 1: /* string property */ strval = kmem_alloc(za.za_num_integers, KM_SLEEP); err = zap_lookup(mos, spa->spa_pool_props_object, za.za_name, 1, za.za_num_integers, strval); if (err) { kmem_free(strval, za.za_num_integers); break; } spa_prop_add_list(*nvp, prop, strval, 0, src); kmem_free(strval, za.za_num_integers); break; default: break; } } zap_cursor_fini(&zc); mutex_exit(&spa->spa_props_lock); out: if (err && err != ENOENT) { nvlist_free(*nvp); *nvp = NULL; return (err); } return (0); } /* * Validate the given pool properties nvlist and modify the list * for the property values to be set. */ static int spa_prop_validate(spa_t *spa, nvlist_t *props) { nvpair_t *elem; int error = 0, reset_bootfs = 0; uint64_t objnum = 0; boolean_t has_feature = B_FALSE; elem = NULL; while ((elem = nvlist_next_nvpair(props, elem)) != NULL) { uint64_t intval; char *strval, *slash, *check, *fname; const char *propname = nvpair_name(elem); zpool_prop_t prop = zpool_name_to_prop(propname); switch (prop) { case ZPOOL_PROP_INVAL: if (!zpool_prop_feature(propname)) { error = SET_ERROR(EINVAL); break; } /* * Sanitize the input. */ if (nvpair_type(elem) != DATA_TYPE_UINT64) { error = SET_ERROR(EINVAL); break; } if (nvpair_value_uint64(elem, &intval) != 0) { error = SET_ERROR(EINVAL); break; } if (intval != 0) { error = SET_ERROR(EINVAL); break; } fname = strchr(propname, '@') + 1; if (zfeature_lookup_name(fname, NULL) != 0) { error = SET_ERROR(EINVAL); break; } has_feature = B_TRUE; break; case ZPOOL_PROP_VERSION: error = nvpair_value_uint64(elem, &intval); if (!error && (intval < spa_version(spa) || intval > SPA_VERSION_BEFORE_FEATURES || has_feature)) error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_DELEGATION: case ZPOOL_PROP_AUTOREPLACE: case ZPOOL_PROP_LISTSNAPS: case ZPOOL_PROP_AUTOEXPAND: error = nvpair_value_uint64(elem, &intval); if (!error && intval > 1) error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_BOOTFS: /* * If the pool version is less than SPA_VERSION_BOOTFS, * or the pool is still being created (version == 0), * the bootfs property cannot be set. */ if (spa_version(spa) < SPA_VERSION_BOOTFS) { error = SET_ERROR(ENOTSUP); break; } /* * Make sure the vdev config is bootable */ if (!vdev_is_bootable(spa->spa_root_vdev)) { error = SET_ERROR(ENOTSUP); break; } reset_bootfs = 1; error = nvpair_value_string(elem, &strval); if (!error) { objset_t *os; uint64_t propval; if (strval == NULL || strval[0] == '\0') { objnum = zpool_prop_default_numeric( ZPOOL_PROP_BOOTFS); break; } error = dmu_objset_hold(strval, FTAG, &os); if (error != 0) break; /* * Must be ZPL, and its property settings * must be supported by GRUB (compression * is not gzip, and large blocks are not used). */ if (dmu_objset_type(os) != DMU_OST_ZFS) { error = SET_ERROR(ENOTSUP); } else if ((error = dsl_prop_get_int_ds(dmu_objset_ds(os), zfs_prop_to_name(ZFS_PROP_COMPRESSION), &propval)) == 0 && !BOOTFS_COMPRESS_VALID(propval)) { error = SET_ERROR(ENOTSUP); } else { objnum = dmu_objset_id(os); } dmu_objset_rele(os, FTAG); } break; case ZPOOL_PROP_FAILUREMODE: error = nvpair_value_uint64(elem, &intval); if (!error && (intval < ZIO_FAILURE_MODE_WAIT || intval > ZIO_FAILURE_MODE_PANIC)) error = SET_ERROR(EINVAL); /* * This is a special case which only occurs when * the pool has completely failed. This allows * the user to change the in-core failmode property * without syncing it out to disk (I/Os might * currently be blocked). We do this by returning * EIO to the caller (spa_prop_set) to trick it * into thinking we encountered a property validation * error. */ if (!error && spa_suspended(spa)) { spa->spa_failmode = intval; error = SET_ERROR(EIO); } break; case ZPOOL_PROP_CACHEFILE: if ((error = nvpair_value_string(elem, &strval)) != 0) break; if (strval[0] == '\0') break; if (strcmp(strval, "none") == 0) break; if (strval[0] != '/') { error = SET_ERROR(EINVAL); break; } slash = strrchr(strval, '/'); ASSERT(slash != NULL); if (slash[1] == '\0' || strcmp(slash, "/.") == 0 || strcmp(slash, "/..") == 0) error = SET_ERROR(EINVAL); break; case ZPOOL_PROP_COMMENT: if ((error = nvpair_value_string(elem, &strval)) != 0) break; for (check = strval; *check != '\0'; check++) { /* * The kernel doesn't have an easy isprint() * check. For this kernel check, we merely * check ASCII apart from DEL. Fix this if * there is an easy-to-use kernel isprint(). */ if (*check >= 0x7f) { error = SET_ERROR(EINVAL); break; } } if (strlen(strval) > ZPROP_MAX_COMMENT) error = E2BIG; break; case ZPOOL_PROP_DEDUPDITTO: if (spa_version(spa) < SPA_VERSION_DEDUP) error = SET_ERROR(ENOTSUP); else error = nvpair_value_uint64(elem, &intval); if (error == 0 && intval != 0 && intval < ZIO_DEDUPDITTO_MIN) error = SET_ERROR(EINVAL); break; } if (error) break; } if (!error && reset_bootfs) { error = nvlist_remove(props, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING); if (!error) { error = nvlist_add_uint64(props, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum); } } return (error); } void spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) { char *cachefile; spa_config_dirent_t *dp; if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), &cachefile) != 0) return; dp = kmem_alloc(sizeof (spa_config_dirent_t), KM_SLEEP); if (cachefile[0] == '\0') dp->scd_path = spa_strdup(spa_config_path); else if (strcmp(cachefile, "none") == 0) dp->scd_path = NULL; else dp->scd_path = spa_strdup(cachefile); list_insert_head(&spa->spa_config_list, dp); if (need_sync) spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } int spa_prop_set(spa_t *spa, nvlist_t *nvp) { int error; nvpair_t *elem = NULL; boolean_t need_sync = B_FALSE; if ((error = spa_prop_validate(spa, nvp)) != 0) return (error); while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) { zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem)); if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT || prop == ZPOOL_PROP_READONLY) continue; if (prop == ZPOOL_PROP_VERSION || prop == ZPOOL_PROP_INVAL) { uint64_t ver; if (prop == ZPOOL_PROP_VERSION) { VERIFY(nvpair_value_uint64(elem, &ver) == 0); } else { ASSERT(zpool_prop_feature(nvpair_name(elem))); ver = SPA_VERSION_FEATURES; need_sync = B_TRUE; } /* Save time if the version is already set. */ if (ver == spa_version(spa)) continue; /* * In addition to the pool directory object, we might * create the pool properties object, the features for * read object, the features for write object, or the * feature descriptions object. */ error = dsl_sync_task(spa->spa_name, NULL, spa_sync_version, &ver, 6, ZFS_SPACE_CHECK_RESERVED); if (error) return (error); continue; } need_sync = B_TRUE; break; } if (need_sync) { return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props, nvp, 6, ZFS_SPACE_CHECK_RESERVED)); } return (0); } /* * If the bootfs property value is dsobj, clear it. */ void spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx) { if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) { VERIFY(zap_remove(spa->spa_meta_objset, spa->spa_pool_props_object, zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0); spa->spa_bootfs = 0; } } /*ARGSUSED*/ static int spa_change_guid_check(void *arg, dmu_tx_t *tx) { uint64_t *newguid = arg; spa_t *spa = dmu_tx_pool(tx)->dp_spa; vdev_t *rvd = spa->spa_root_vdev; uint64_t vdev_state; if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { int error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; return (SET_ERROR(error)); } spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); vdev_state = rvd->vdev_state; spa_config_exit(spa, SCL_STATE, FTAG); if (vdev_state != VDEV_STATE_HEALTHY) return (SET_ERROR(ENXIO)); ASSERT3U(spa_guid(spa), !=, *newguid); return (0); } static void spa_change_guid_sync(void *arg, dmu_tx_t *tx) { uint64_t *newguid = arg; spa_t *spa = dmu_tx_pool(tx)->dp_spa; uint64_t oldguid; vdev_t *rvd = spa->spa_root_vdev; oldguid = spa_guid(spa); spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); rvd->vdev_guid = *newguid; rvd->vdev_guid_sum += (*newguid - oldguid); vdev_config_dirty(rvd); spa_config_exit(spa, SCL_STATE, FTAG); spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu", oldguid, *newguid); } /* * Change the GUID for the pool. This is done so that we can later * re-import a pool built from a clone of our own vdevs. We will modify * the root vdev's guid, our own pool guid, and then mark all of our * vdevs dirty. Note that we must make sure that all our vdevs are * online when we do this, or else any vdevs that weren't present * would be orphaned from our pool. We are also going to issue a * sysevent to update any watchers. */ int spa_change_guid(spa_t *spa) { int error; uint64_t guid; mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); guid = spa_generate_guid(NULL); error = dsl_sync_task(spa->spa_name, spa_change_guid_check, spa_change_guid_sync, &guid, 5, ZFS_SPACE_CHECK_RESERVED); if (error == 0) { spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID); } mutex_exit(&spa_namespace_lock); mutex_exit(&spa->spa_vdev_top_lock); return (error); } /* * ========================================================================== * SPA state manipulation (open/create/destroy/import/export) * ========================================================================== */ static int spa_error_entry_compare(const void *a, const void *b) { const spa_error_entry_t *sa = (const spa_error_entry_t *)a; const spa_error_entry_t *sb = (const spa_error_entry_t *)b; int ret; ret = memcmp(&sa->se_bookmark, &sb->se_bookmark, sizeof (zbookmark_phys_t)); return (AVL_ISIGN(ret)); } /* * Utility function which retrieves copies of the current logs and * re-initializes them in the process. */ void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub) { ASSERT(MUTEX_HELD(&spa->spa_errlist_lock)); bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t)); bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t)); avl_create(&spa->spa_errlist_scrub, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); } static void spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { const zio_taskq_info_t *ztip = &zio_taskqs[t][q]; enum zti_modes mode = ztip->zti_mode; uint_t value = ztip->zti_value; uint_t count = ztip->zti_count; spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; char name[32]; uint_t flags = 0; boolean_t batch = B_FALSE; if (mode == ZTI_MODE_NULL) { tqs->stqs_count = 0; tqs->stqs_taskq = NULL; return; } ASSERT3U(count, >, 0); tqs->stqs_count = count; tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); switch (mode) { case ZTI_MODE_FIXED: ASSERT3U(value, >=, 1); value = MAX(value, 1); break; case ZTI_MODE_BATCH: batch = B_TRUE; flags |= TASKQ_THREADS_CPU_PCT; value = zio_taskq_batch_pct; break; default: panic("unrecognized mode for %s_%s taskq (%u:%u) in " "spa_activate()", zio_type_name[t], zio_taskq_types[q], mode, value); break; } for (uint_t i = 0; i < count; i++) { taskq_t *tq; if (count > 1) { (void) snprintf(name, sizeof (name), "%s_%s_%u", zio_type_name[t], zio_taskq_types[q], i); } else { (void) snprintf(name, sizeof (name), "%s_%s", zio_type_name[t], zio_taskq_types[q]); } #ifdef SYSDC if (zio_taskq_sysdc && spa->spa_proc != &p0) { if (batch) flags |= TASKQ_DC_BATCH; tq = taskq_create_sysdc(name, value, 50, INT_MAX, spa->spa_proc, zio_taskq_basedc, flags); } else { #endif pri_t pri = maxclsyspri; /* * The write issue taskq can be extremely CPU * intensive. Run it at slightly lower priority * than the other taskqs. * FreeBSD notes: * - numerically higher priorities are lower priorities; * - if priorities divided by four (RQ_PPQ) are equal * then a difference between them is insignificant. */ if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) #ifdef illumos pri--; #else pri += 4; #endif tq = taskq_create_proc(name, value, pri, 50, INT_MAX, spa->spa_proc, flags); #ifdef SYSDC } #endif tqs->stqs_taskq[i] = tq; } } static void spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q) { spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; if (tqs->stqs_taskq == NULL) { ASSERT0(tqs->stqs_count); return; } for (uint_t i = 0; i < tqs->stqs_count; i++) { ASSERT3P(tqs->stqs_taskq[i], !=, NULL); taskq_destroy(tqs->stqs_taskq[i]); } kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *)); tqs->stqs_taskq = NULL; } /* * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority. * Note that a type may have multiple discrete taskqs to avoid lock contention * on the taskq itself. In that case we choose which taskq at random by using * the low bits of gethrtime(). */ void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent) { spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; taskq_t *tq; ASSERT3P(tqs->stqs_taskq, !=, NULL); ASSERT3U(tqs->stqs_count, !=, 0); if (tqs->stqs_count == 1) { tq = tqs->stqs_taskq[0]; } else { #ifdef _KERNEL tq = tqs->stqs_taskq[cpu_ticks() % tqs->stqs_count]; #else tq = tqs->stqs_taskq[gethrtime() % tqs->stqs_count]; #endif } taskq_dispatch_ent(tq, func, arg, flags, ent); } static void spa_create_zio_taskqs(spa_t *spa) { for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { spa_taskqs_init(spa, t, q); } } } #ifdef _KERNEL #ifdef SPA_PROCESS static void spa_thread(void *arg) { callb_cpr_t cprinfo; spa_t *spa = arg; user_t *pu = PTOU(curproc); CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr, spa->spa_name); ASSERT(curproc != &p0); (void) snprintf(pu->u_psargs, sizeof (pu->u_psargs), "zpool-%s", spa->spa_name); (void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm)); #ifdef PSRSET_BIND /* bind this thread to the requested psrset */ if (zio_taskq_psrset_bind != PS_NONE) { pool_lock(); mutex_enter(&cpu_lock); mutex_enter(&pidlock); mutex_enter(&curproc->p_lock); if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind, 0, NULL, NULL) == 0) { curthread->t_bind_pset = zio_taskq_psrset_bind; } else { cmn_err(CE_WARN, "Couldn't bind process for zfs pool \"%s\" to " "pset %d\n", spa->spa_name, zio_taskq_psrset_bind); } mutex_exit(&curproc->p_lock); mutex_exit(&pidlock); mutex_exit(&cpu_lock); pool_unlock(); } #endif #ifdef SYSDC if (zio_taskq_sysdc) { sysdc_thread_enter(curthread, 100, 0); } #endif spa->spa_proc = curproc; spa->spa_did = curthread->t_did; spa_create_zio_taskqs(spa); mutex_enter(&spa->spa_proc_lock); ASSERT(spa->spa_proc_state == SPA_PROC_CREATED); spa->spa_proc_state = SPA_PROC_ACTIVE; cv_broadcast(&spa->spa_proc_cv); CALLB_CPR_SAFE_BEGIN(&cprinfo); while (spa->spa_proc_state == SPA_PROC_ACTIVE) cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock); ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE); spa->spa_proc_state = SPA_PROC_GONE; spa->spa_proc = &p0; cv_broadcast(&spa->spa_proc_cv); CALLB_CPR_EXIT(&cprinfo); /* drops spa_proc_lock */ mutex_enter(&curproc->p_lock); lwp_exit(); } #endif /* SPA_PROCESS */ #endif /* * Activate an uninitialized pool. */ static void spa_activate(spa_t *spa, int mode) { ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); spa->spa_state = POOL_STATE_ACTIVE; spa->spa_mode = mode; spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops); spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops); /* Try to create a covering process */ mutex_enter(&spa->spa_proc_lock); ASSERT(spa->spa_proc_state == SPA_PROC_NONE); ASSERT(spa->spa_proc == &p0); spa->spa_did = 0; #ifdef SPA_PROCESS /* Only create a process if we're going to be around a while. */ if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) { if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri, NULL, 0) == 0) { spa->spa_proc_state = SPA_PROC_CREATED; while (spa->spa_proc_state == SPA_PROC_CREATED) { cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); } ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); ASSERT(spa->spa_proc != &p0); ASSERT(spa->spa_did != 0); } else { #ifdef _KERNEL cmn_err(CE_WARN, "Couldn't create process for zfs pool \"%s\"\n", spa->spa_name); #endif } } #endif /* SPA_PROCESS */ mutex_exit(&spa->spa_proc_lock); /* If we didn't create a process, we need to create our taskqs. */ ASSERT(spa->spa_proc == &p0); if (spa->spa_proc == &p0) { spa_create_zio_taskqs(spa); } /* * Start TRIM thread. */ trim_thread_create(spa); for (size_t i = 0; i < TXG_SIZE; i++) { spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); } list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), offsetof(vdev_t, vdev_config_dirty_node)); list_create(&spa->spa_evicting_os_list, sizeof (objset_t), offsetof(objset_t, os_evicting_node)); list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), offsetof(vdev_t, vdev_state_dirty_node)); txg_list_create(&spa->spa_vdev_txg_list, spa, offsetof(struct vdev, vdev_txg_node)); avl_create(&spa->spa_errlist_scrub, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); } /* * Opposite of spa_activate(). */ static void spa_deactivate(spa_t *spa) { ASSERT(spa->spa_sync_on == B_FALSE); ASSERT(spa->spa_dsl_pool == NULL); ASSERT(spa->spa_root_vdev == NULL); ASSERT(spa->spa_async_zio_root == NULL); ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); /* * Stop TRIM thread in case spa_unload() wasn't called directly * before spa_deactivate(). */ trim_thread_destroy(spa); spa_evicting_os_wait(spa); txg_list_destroy(&spa->spa_vdev_txg_list); list_destroy(&spa->spa_config_dirty_list); list_destroy(&spa->spa_evicting_os_list); list_destroy(&spa->spa_state_dirty_list); for (int t = 0; t < ZIO_TYPES; t++) { for (int q = 0; q < ZIO_TASKQ_TYPES; q++) { spa_taskqs_fini(spa, t, q); } } for (size_t i = 0; i < TXG_SIZE; i++) { ASSERT3P(spa->spa_txg_zio[i], !=, NULL); VERIFY0(zio_wait(spa->spa_txg_zio[i])); spa->spa_txg_zio[i] = NULL; } metaslab_class_destroy(spa->spa_normal_class); spa->spa_normal_class = NULL; metaslab_class_destroy(spa->spa_log_class); spa->spa_log_class = NULL; /* * If this was part of an import or the open otherwise failed, we may * still have errors left in the queues. Empty them just in case. */ spa_errlog_drain(spa); avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); spa->spa_state = POOL_STATE_UNINITIALIZED; mutex_enter(&spa->spa_proc_lock); if (spa->spa_proc_state != SPA_PROC_NONE) { ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE); spa->spa_proc_state = SPA_PROC_DEACTIVATE; cv_broadcast(&spa->spa_proc_cv); while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) { ASSERT(spa->spa_proc != &p0); cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock); } ASSERT(spa->spa_proc_state == SPA_PROC_GONE); spa->spa_proc_state = SPA_PROC_NONE; } ASSERT(spa->spa_proc == &p0); mutex_exit(&spa->spa_proc_lock); #ifdef SPA_PROCESS /* * We want to make sure spa_thread() has actually exited the ZFS * module, so that the module can't be unloaded out from underneath * it. */ if (spa->spa_did != 0) { thread_join(spa->spa_did); spa->spa_did = 0; } #endif /* SPA_PROCESS */ } /* * Verify a pool configuration, and construct the vdev tree appropriately. This * will create all the necessary vdevs in the appropriate layout, with each vdev * in the CLOSED state. This will prep the pool before open/creation/import. * All vdev validation is done by the vdev_alloc() routine. */ static int spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, int atype) { nvlist_t **child; uint_t children; int error; if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0) return (error); if ((*vdp)->vdev_ops->vdev_op_leaf) return (0); error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children); if (error == ENOENT) return (0); if (error) { vdev_free(*vdp); *vdp = NULL; return (SET_ERROR(EINVAL)); } for (int c = 0; c < children; c++) { vdev_t *vd; if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c, atype)) != 0) { vdev_free(*vdp); *vdp = NULL; return (error); } } ASSERT(*vdp != NULL); return (0); } /* * Opposite of spa_load(). */ static void spa_unload(spa_t *spa) { int i; ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa_load_note(spa, "UNLOADING"); /* * Stop TRIM thread. */ trim_thread_destroy(spa); /* * Stop async tasks. */ spa_async_suspend(spa); if (spa->spa_root_vdev) { vdev_initialize_stop_all(spa->spa_root_vdev, VDEV_INITIALIZE_ACTIVE); } /* * Stop syncing. */ if (spa->spa_sync_on) { txg_sync_stop(spa->spa_dsl_pool); spa->spa_sync_on = B_FALSE; } /* * Even though vdev_free() also calls vdev_metaslab_fini, we need * to call it earlier, before we wait for async i/o to complete. * This ensures that there is no async metaslab prefetching, by * calling taskq_wait(mg_taskq). */ if (spa->spa_root_vdev != NULL) { spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++) vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]); spa_config_exit(spa, SCL_ALL, spa); } /* * Wait for any outstanding async I/O to complete. */ if (spa->spa_async_zio_root != NULL) { for (int i = 0; i < max_ncpus; i++) (void) zio_wait(spa->spa_async_zio_root[i]); kmem_free(spa->spa_async_zio_root, max_ncpus * sizeof (void *)); spa->spa_async_zio_root = NULL; } if (spa->spa_vdev_removal != NULL) { spa_vdev_removal_destroy(spa->spa_vdev_removal); spa->spa_vdev_removal = NULL; } if (spa->spa_condense_zthr != NULL) { ASSERT(!zthr_isrunning(spa->spa_condense_zthr)); zthr_destroy(spa->spa_condense_zthr); spa->spa_condense_zthr = NULL; } if (spa->spa_checkpoint_discard_zthr != NULL) { ASSERT(!zthr_isrunning(spa->spa_checkpoint_discard_zthr)); zthr_destroy(spa->spa_checkpoint_discard_zthr); spa->spa_checkpoint_discard_zthr = NULL; } spa_condense_fini(spa); bpobj_close(&spa->spa_deferred_bpobj); spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); /* * Close all vdevs. */ if (spa->spa_root_vdev) vdev_free(spa->spa_root_vdev); ASSERT(spa->spa_root_vdev == NULL); /* * Close the dsl pool. */ if (spa->spa_dsl_pool) { dsl_pool_close(spa->spa_dsl_pool); spa->spa_dsl_pool = NULL; spa->spa_meta_objset = NULL; } ddt_unload(spa); /* * Drop and purge level 2 cache */ spa_l2cache_drop(spa); for (i = 0; i < spa->spa_spares.sav_count; i++) vdev_free(spa->spa_spares.sav_vdevs[i]); if (spa->spa_spares.sav_vdevs) { kmem_free(spa->spa_spares.sav_vdevs, spa->spa_spares.sav_count * sizeof (void *)); spa->spa_spares.sav_vdevs = NULL; } if (spa->spa_spares.sav_config) { nvlist_free(spa->spa_spares.sav_config); spa->spa_spares.sav_config = NULL; } spa->spa_spares.sav_count = 0; for (i = 0; i < spa->spa_l2cache.sav_count; i++) { vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); vdev_free(spa->spa_l2cache.sav_vdevs[i]); } if (spa->spa_l2cache.sav_vdevs) { kmem_free(spa->spa_l2cache.sav_vdevs, spa->spa_l2cache.sav_count * sizeof (void *)); spa->spa_l2cache.sav_vdevs = NULL; } if (spa->spa_l2cache.sav_config) { nvlist_free(spa->spa_l2cache.sav_config); spa->spa_l2cache.sav_config = NULL; } spa->spa_l2cache.sav_count = 0; spa->spa_async_suspended = 0; spa->spa_indirect_vdevs_loaded = B_FALSE; if (spa->spa_comment != NULL) { spa_strfree(spa->spa_comment); spa->spa_comment = NULL; } spa_config_exit(spa, SCL_ALL, spa); } /* * Load (or re-load) the current list of vdevs describing the active spares for * this pool. When this is called, we have some form of basic information in * 'spa_spares.sav_config'. We parse this into vdevs, try to open them, and * then re-generate a more complete list including status information. */ void spa_load_spares(spa_t *spa) { nvlist_t **spares; uint_t nspares; int i; vdev_t *vd, *tvd; #ifndef _KERNEL /* * zdb opens both the current state of the pool and the * checkpointed state (if present), with a different spa_t. * * As spare vdevs are shared among open pools, we skip loading * them when we load the checkpointed state of the pool. */ if (!spa_writeable(spa)) return; #endif ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); /* * First, close and free any existing spare vdevs. */ for (i = 0; i < spa->spa_spares.sav_count; i++) { vd = spa->spa_spares.sav_vdevs[i]; /* Undo the call to spa_activate() below */ if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, B_FALSE)) != NULL && tvd->vdev_isspare) spa_spare_remove(tvd); vdev_close(vd); vdev_free(vd); } if (spa->spa_spares.sav_vdevs) kmem_free(spa->spa_spares.sav_vdevs, spa->spa_spares.sav_count * sizeof (void *)); if (spa->spa_spares.sav_config == NULL) nspares = 0; else VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); spa->spa_spares.sav_count = (int)nspares; spa->spa_spares.sav_vdevs = NULL; if (nspares == 0) return; /* * Construct the array of vdevs, opening them to get status in the * process. For each spare, there is potentially two different vdev_t * structures associated with it: one in the list of spares (used only * for basic validation purposes) and one in the active vdev * configuration (if it's spared in). During this phase we open and * validate each vdev on the spare list. If the vdev also exists in the * active configuration, then we also mark this vdev as an active spare. */ spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *), KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) { VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, VDEV_ALLOC_SPARE) == 0); ASSERT(vd != NULL); spa->spa_spares.sav_vdevs[i] = vd; if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, B_FALSE)) != NULL) { if (!tvd->vdev_isspare) spa_spare_add(tvd); /* * We only mark the spare active if we were successfully * able to load the vdev. Otherwise, importing a pool * with a bad active spare would result in strange * behavior, because multiple pool would think the spare * is actively in use. * * There is a vulnerability here to an equally bizarre * circumstance, where a dead active spare is later * brought back to life (onlined or otherwise). Given * the rarity of this scenario, and the extra complexity * it adds, we ignore the possibility. */ if (!vdev_is_dead(tvd)) spa_spare_activate(tvd); } vd->vdev_top = vd; vd->vdev_aux = &spa->spa_spares; if (vdev_open(vd) != 0) continue; if (vdev_validate_aux(vd) == 0) spa_spare_add(vd); } /* * Recompute the stashed list of spares, with status information * this time. */ VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) spares[i] = vdev_config_generate(spa, spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0); for (i = 0; i < spa->spa_spares.sav_count; i++) nvlist_free(spares[i]); kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *)); } /* * Load (or re-load) the current list of vdevs describing the active l2cache for * this pool. When this is called, we have some form of basic information in * 'spa_l2cache.sav_config'. We parse this into vdevs, try to open them, and * then re-generate a more complete list including status information. * Devices which are already active have their details maintained, and are * not re-opened. */ void spa_load_l2cache(spa_t *spa) { nvlist_t **l2cache; uint_t nl2cache; int i, j, oldnvdevs; uint64_t guid; vdev_t *vd, **oldvdevs, **newvdevs; spa_aux_vdev_t *sav = &spa->spa_l2cache; #ifndef _KERNEL /* * zdb opens both the current state of the pool and the * checkpointed state (if present), with a different spa_t. * * As L2 caches are part of the ARC which is shared among open * pools, we skip loading them when we load the checkpointed * state of the pool. */ if (!spa_writeable(spa)) return; #endif ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); if (sav->sav_config != NULL) { VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); } else { nl2cache = 0; newvdevs = NULL; } oldvdevs = sav->sav_vdevs; oldnvdevs = sav->sav_count; sav->sav_vdevs = NULL; sav->sav_count = 0; /* * Process new nvlist of vdevs. */ for (i = 0; i < nl2cache; i++) { VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, &guid) == 0); newvdevs[i] = NULL; for (j = 0; j < oldnvdevs; j++) { vd = oldvdevs[j]; if (vd != NULL && guid == vd->vdev_guid) { /* * Retain previous vdev for add/remove ops. */ newvdevs[i] = vd; oldvdevs[j] = NULL; break; } } if (newvdevs[i] == NULL) { /* * Create new vdev */ VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0, VDEV_ALLOC_L2CACHE) == 0); ASSERT(vd != NULL); newvdevs[i] = vd; /* * Commit this vdev as an l2cache device, * even if it fails to open. */ spa_l2cache_add(vd); vd->vdev_top = vd; vd->vdev_aux = sav; spa_l2cache_activate(vd); if (vdev_open(vd) != 0) continue; (void) vdev_validate_aux(vd); if (!vdev_is_dead(vd)) l2arc_add_vdev(spa, vd); } } /* * Purge vdevs that were dropped */ for (i = 0; i < oldnvdevs; i++) { uint64_t pool; vd = oldvdevs[i]; if (vd != NULL) { ASSERT(vd->vdev_isl2cache); if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); vdev_clear_stats(vd); vdev_free(vd); } } if (oldvdevs) kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); if (sav->sav_config == NULL) goto out; sav->sav_vdevs = newvdevs; sav->sav_count = (int)nl2cache; /* * Recompute the stashed list of l2cache devices, with status * information this time. */ VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < sav->sav_count; i++) l2cache[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); VERIFY(nvlist_add_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0); out: for (i = 0; i < sav->sav_count; i++) nvlist_free(l2cache[i]); if (sav->sav_count) kmem_free(l2cache, sav->sav_count * sizeof (void *)); } static int load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) { dmu_buf_t *db; char *packed = NULL; size_t nvsize = 0; int error; *value = NULL; error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db); if (error != 0) return (error); nvsize = *(uint64_t *)db->db_data; dmu_buf_rele(db, FTAG); packed = kmem_alloc(nvsize, KM_SLEEP); error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, DMU_READ_PREFETCH); if (error == 0) error = nvlist_unpack(packed, nvsize, value, 0); kmem_free(packed, nvsize); return (error); } /* * Concrete top-level vdevs that are not missing and are not logs. At every * spa_sync we write new uberblocks to at least SPA_SYNC_MIN_VDEVS core tvds. */ static uint64_t spa_healthy_core_tvds(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; uint64_t tvds = 0; for (uint64_t i = 0; i < rvd->vdev_children; i++) { vdev_t *vd = rvd->vdev_child[i]; if (vd->vdev_islog) continue; if (vdev_is_concrete(vd) && !vdev_is_dead(vd)) tvds++; } return (tvds); } /* * Checks to see if the given vdev could not be opened, in which case we post a * sysevent to notify the autoreplace code that the device has been removed. */ static void spa_check_removed(vdev_t *vd) { for (uint64_t c = 0; c < vd->vdev_children; c++) spa_check_removed(vd->vdev_child[c]); if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) && vdev_is_concrete(vd)) { zfs_post_autoreplace(vd->vdev_spa, vd); spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK); } } static int spa_check_for_missing_logs(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; /* * If we're doing a normal import, then build up any additional * diagnostic information about missing log devices. * We'll pass this up to the user for further processing. */ if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) { nvlist_t **child, *nv; uint64_t idx = 0; child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), KM_SLEEP); VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (uint64_t c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; /* * We consider a device as missing only if it failed * to open (i.e. offline or faulted is not considered * as missing). */ if (tvd->vdev_islog && tvd->vdev_state == VDEV_STATE_CANT_OPEN) { child[idx++] = vdev_config_generate(spa, tvd, B_FALSE, VDEV_CONFIG_MISSING); } } if (idx > 0) { fnvlist_add_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, child, idx); fnvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_MISSING_DEVICES, nv); for (uint64_t i = 0; i < idx; i++) nvlist_free(child[i]); } nvlist_free(nv); kmem_free(child, rvd->vdev_children * sizeof (char **)); if (idx > 0) { spa_load_failed(spa, "some log devices are missing"); vdev_dbgmsg_print_tree(rvd, 2); return (SET_ERROR(ENXIO)); } } else { for (uint64_t c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; if (tvd->vdev_islog && tvd->vdev_state == VDEV_STATE_CANT_OPEN) { spa_set_log_state(spa, SPA_LOG_CLEAR); spa_load_note(spa, "some log devices are " "missing, ZIL is dropped."); vdev_dbgmsg_print_tree(rvd, 2); break; } } } return (0); } /* * Check for missing log devices */ static boolean_t spa_check_logs(spa_t *spa) { boolean_t rv = B_FALSE; dsl_pool_t *dp = spa_get_dsl(spa); switch (spa->spa_log_state) { case SPA_LOG_MISSING: /* need to recheck in case slog has been restored */ case SPA_LOG_UNKNOWN: rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj, zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0); if (rv) spa_set_log_state(spa, SPA_LOG_MISSING); break; } return (rv); } static boolean_t spa_passivate_log(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; boolean_t slog_found = B_FALSE; ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); if (!spa_has_slogs(spa)) return (B_FALSE); for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; if (tvd->vdev_islog) { metaslab_group_passivate(mg); slog_found = B_TRUE; } } return (slog_found); } static void spa_activate_log(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER)); for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; if (tvd->vdev_islog) metaslab_group_activate(mg); } } int spa_reset_logs(spa_t *spa) { int error; error = dmu_objset_find(spa_name(spa), zil_reset, NULL, DS_FIND_CHILDREN); if (error == 0) { /* * We successfully offlined the log device, sync out the * current txg so that the "stubby" block can be removed * by zil_sync(). */ txg_wait_synced(spa->spa_dsl_pool, 0); } return (error); } static void spa_aux_check_removed(spa_aux_vdev_t *sav) { int i; for (i = 0; i < sav->sav_count; i++) spa_check_removed(sav->sav_vdevs[i]); } void spa_claim_notify(zio_t *zio) { spa_t *spa = zio->io_spa; if (zio->io_error) return; mutex_enter(&spa->spa_props_lock); /* any mutex will do */ if (spa->spa_claim_max_txg < zio->io_bp->blk_birth) spa->spa_claim_max_txg = zio->io_bp->blk_birth; mutex_exit(&spa->spa_props_lock); } typedef struct spa_load_error { uint64_t sle_meta_count; uint64_t sle_data_count; } spa_load_error_t; static void spa_load_verify_done(zio_t *zio) { blkptr_t *bp = zio->io_bp; spa_load_error_t *sle = zio->io_private; dmu_object_type_t type = BP_GET_TYPE(bp); int error = zio->io_error; spa_t *spa = zio->io_spa; abd_free(zio->io_abd); if (error) { if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && type != DMU_OT_INTENT_LOG) atomic_inc_64(&sle->sle_meta_count); else atomic_inc_64(&sle->sle_data_count); } mutex_enter(&spa->spa_scrub_lock); spa->spa_load_verify_ios--; cv_broadcast(&spa->spa_scrub_io_cv); mutex_exit(&spa->spa_scrub_lock); } /* * Maximum number of concurrent scrub i/os to create while verifying * a pool while importing it. */ int spa_load_verify_maxinflight = 10000; boolean_t spa_load_verify_metadata = B_TRUE; boolean_t spa_load_verify_data = B_TRUE; SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_maxinflight, CTLFLAG_RWTUN, &spa_load_verify_maxinflight, 0, "Maximum number of concurrent scrub I/Os to create while verifying a " "pool while importing it"); SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_metadata, CTLFLAG_RWTUN, &spa_load_verify_metadata, 0, "Check metadata on import?"); SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_load_verify_data, CTLFLAG_RWTUN, &spa_load_verify_data, 0, "Check user data on import?"); /*ARGSUSED*/ static int spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); /* * Note: normally this routine will not be called if * spa_load_verify_metadata is not set. However, it may be useful * to manually set the flag after the traversal has begun. */ if (!spa_load_verify_metadata) return (0); if (!BP_IS_METADATA(bp) && !spa_load_verify_data) return (0); zio_t *rio = arg; size_t size = BP_GET_PSIZE(bp); mutex_enter(&spa->spa_scrub_lock); while (spa->spa_load_verify_ios >= spa_load_verify_maxinflight) cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); spa->spa_load_verify_ios++; mutex_exit(&spa->spa_scrub_lock); zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); return (0); } /* ARGSUSED */ int verify_dataset_name_len(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { if (dsl_dataset_namelen(ds) >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); return (0); } static int spa_load_verify(spa_t *spa) { zio_t *rio; spa_load_error_t sle = { 0 }; zpool_load_policy_t policy; boolean_t verify_ok = B_FALSE; int error = 0; zpool_get_load_policy(spa->spa_config, &policy); if (policy.zlp_rewind & ZPOOL_NEVER_REWIND) return (0); dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); error = dmu_objset_find_dp(spa->spa_dsl_pool, spa->spa_dsl_pool->dp_root_dir_obj, verify_dataset_name_len, NULL, DS_FIND_CHILDREN); dsl_pool_config_exit(spa->spa_dsl_pool, FTAG); if (error != 0) return (error); rio = zio_root(spa, NULL, &sle, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE); if (spa_load_verify_metadata) { if (spa->spa_extreme_rewind) { spa_load_note(spa, "performing a complete scan of the " "pool since extreme rewind is on. This may take " "a very long time.\n (spa_load_verify_data=%u, " "spa_load_verify_metadata=%u)", spa_load_verify_data, spa_load_verify_metadata); } error = traverse_pool(spa, spa->spa_verify_min_txg, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, spa_load_verify_cb, rio); } (void) zio_wait(rio); spa->spa_load_meta_errors = sle.sle_meta_count; spa->spa_load_data_errors = sle.sle_data_count; if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { spa_load_note(spa, "spa_load_verify found %llu metadata errors " "and %llu data errors", (u_longlong_t)sle.sle_meta_count, (u_longlong_t)sle.sle_data_count); } if (spa_load_verify_dryrun || (!error && sle.sle_meta_count <= policy.zlp_maxmeta && sle.sle_data_count <= policy.zlp_maxdata)) { int64_t loss = 0; verify_ok = B_TRUE; spa->spa_load_txg = spa->spa_uberblock.ub_txg; spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp; loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts; VERIFY(nvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0); VERIFY(nvlist_add_int64(spa->spa_load_info, ZPOOL_CONFIG_REWIND_TIME, loss) == 0); VERIFY(nvlist_add_uint64(spa->spa_load_info, ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0); } else { spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; } if (spa_load_verify_dryrun) return (0); if (error) { if (error != ENXIO && error != EIO) error = SET_ERROR(EIO); return (error); } return (verify_ok ? 0 : EIO); } /* * Find a value in the pool props object. */ static void spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val) { (void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object, zpool_prop_to_name(prop), sizeof (uint64_t), 1, val); } /* * Find a value in the pool directory object. */ static int spa_dir_prop(spa_t *spa, const char *name, uint64_t *val, boolean_t log_enoent) { int error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, name, sizeof (uint64_t), 1, val); if (error != 0 && (error != ENOENT || log_enoent)) { spa_load_failed(spa, "couldn't get '%s' value in MOS directory " "[error=%d]", name, error); } return (error); } static int spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err) { vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux); return (SET_ERROR(err)); } static void spa_spawn_aux_threads(spa_t *spa) { ASSERT(spa_writeable(spa)); ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa_start_indirect_condensing_thread(spa); ASSERT3P(spa->spa_checkpoint_discard_zthr, ==, NULL); spa->spa_checkpoint_discard_zthr = zthr_create(spa_checkpoint_discard_thread_check, spa_checkpoint_discard_thread, spa); } /* * Fix up config after a partly-completed split. This is done with the * ZPOOL_CONFIG_SPLIT nvlist. Both the splitting pool and the split-off * pool have that entry in their config, but only the splitting one contains * a list of all the guids of the vdevs that are being split off. * * This function determines what to do with that list: either rejoin * all the disks to the pool, or complete the splitting process. To attempt * the rejoin, each disk that is offlined is marked online again, and * we do a reopen() call. If the vdev label for every disk that was * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL) * then we call vdev_split() on each disk, and complete the split. * * Otherwise we leave the config alone, with all the vdevs in place in * the original pool. */ static void spa_try_repair(spa_t *spa, nvlist_t *config) { uint_t extracted; uint64_t *glist; uint_t i, gcount; nvlist_t *nvl; vdev_t **vd; boolean_t attempt_reopen; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0) return; /* check that the config is complete */ if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, &glist, &gcount) != 0) return; vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); /* attempt to online all the vdevs & validate */ attempt_reopen = B_TRUE; for (i = 0; i < gcount; i++) { if (glist[i] == 0) /* vdev is hole */ continue; vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE); if (vd[i] == NULL) { /* * Don't bother attempting to reopen the disks; * just do the split. */ attempt_reopen = B_FALSE; } else { /* attempt to re-online it */ vd[i]->vdev_offline = B_FALSE; } } if (attempt_reopen) { vdev_reopen(spa->spa_root_vdev); /* check each device to see what state it's in */ for (extracted = 0, i = 0; i < gcount; i++) { if (vd[i] != NULL && vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL) break; ++extracted; } } /* * If every disk has been moved to the new pool, or if we never * even attempted to look at them, then we split them off for * good. */ if (!attempt_reopen || gcount == extracted) { for (i = 0; i < gcount; i++) if (vd[i] != NULL) vdev_split(vd[i]); vdev_reopen(spa->spa_root_vdev); } kmem_free(vd, gcount * sizeof (vdev_t *)); } static int spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) { char *ereport = FM_EREPORT_ZFS_POOL; int error; spa->spa_load_state = state; gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, type, &ereport); /* * Don't count references from objsets that are already closed * and are making their way through the eviction process. */ spa_evicting_os_wait(spa); spa->spa_minref = refcount_count(&spa->spa_refcount); if (error) { if (error != EEXIST) { spa->spa_loaded_ts.tv_sec = 0; spa->spa_loaded_ts.tv_nsec = 0; } if (error != EBADF) { zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0); } } spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; spa->spa_ena = 0; return (error); } /* * Count the number of per-vdev ZAPs associated with all of the vdevs in the * vdev tree rooted in the given vd, and ensure that each ZAP is present in the * spa's per-vdev ZAP list. */ static uint64_t vdev_count_verify_zaps(vdev_t *vd) { spa_t *spa = vd->vdev_spa; uint64_t total = 0; if (vd->vdev_top_zap != 0) { total++; ASSERT0(zap_lookup_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps, vd->vdev_top_zap)); } if (vd->vdev_leaf_zap != 0) { total++; ASSERT0(zap_lookup_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps, vd->vdev_leaf_zap)); } for (uint64_t i = 0; i < vd->vdev_children; i++) { total += vdev_count_verify_zaps(vd->vdev_child[i]); } return (total); } static int spa_verify_host(spa_t *spa, nvlist_t *mos_config) { uint64_t hostid; char *hostname; uint64_t myhostid = 0; if (!spa_is_root(spa) && nvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_HOSTID, &hostid) == 0) { hostname = fnvlist_lookup_string(mos_config, ZPOOL_CONFIG_HOSTNAME); myhostid = zone_get_hostid(NULL); if (hostid != 0 && myhostid != 0 && hostid != myhostid) { cmn_err(CE_WARN, "pool '%s' could not be " "loaded as it was last accessed by " "another system (host: %s hostid: 0x%llx). " "See: http://illumos.org/msg/ZFS-8000-EY", spa_name(spa), hostname, (u_longlong_t)hostid); spa_load_failed(spa, "hostid verification failed: pool " "last accessed by host: %s (hostid: 0x%llx)", hostname, (u_longlong_t)hostid); return (SET_ERROR(EBADF)); } } return (0); } static int spa_ld_parse_config(spa_t *spa, spa_import_type_t type) { int error = 0; nvlist_t *nvtree, *nvl, *config = spa->spa_config; int parse; vdev_t *rvd; uint64_t pool_guid; char *comment; /* * Versioning wasn't explicitly added to the label until later, so if * it's not present treat it as the initial version. */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &spa->spa_ubsync.ub_version) != 0) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) { spa_load_failed(spa, "invalid config provided: '%s' missing", ZPOOL_CONFIG_POOL_GUID); return (SET_ERROR(EINVAL)); } /* * If we are doing an import, ensure that the pool is not already * imported by checking if its pool guid already exists in the * spa namespace. * * The only case that we allow an already imported pool to be * imported again, is when the pool is checkpointed and we want to * look at its checkpointed state from userland tools like zdb. */ #ifdef _KERNEL if ((spa->spa_load_state == SPA_LOAD_IMPORT || spa->spa_load_state == SPA_LOAD_TRYIMPORT) && spa_guid_exists(pool_guid, 0)) { #else if ((spa->spa_load_state == SPA_LOAD_IMPORT || spa->spa_load_state == SPA_LOAD_TRYIMPORT) && spa_guid_exists(pool_guid, 0) && !spa_importing_readonly_checkpoint(spa)) { #endif spa_load_failed(spa, "a pool with guid %llu is already open", (u_longlong_t)pool_guid); return (SET_ERROR(EEXIST)); } spa->spa_config_guid = pool_guid; nvlist_free(spa->spa_load_info); spa->spa_load_info = fnvlist_alloc(); ASSERT(spa->spa_comment == NULL); if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) spa->spa_comment = spa_strdup(comment); (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &spa->spa_config_txg); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) spa->spa_config_splitting = fnvlist_dup(nvl); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtree)) { spa_load_failed(spa, "invalid config provided: '%s' missing", ZPOOL_CONFIG_VDEV_TREE); return (SET_ERROR(EINVAL)); } /* * Create "The Godfather" zio to hold all async IOs */ spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); for (int i = 0; i < max_ncpus; i++) { spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); } /* * Parse the configuration into a vdev tree. We explicitly set the * value that will be returned by spa_version() since parsing the * configuration requires knowing the version number. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); parse = (type == SPA_IMPORT_EXISTING ? VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT); error = spa_config_parse(spa, &rvd, nvtree, NULL, 0, parse); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { spa_load_failed(spa, "unable to parse config [error=%d]", error); return (error); } ASSERT(spa->spa_root_vdev == rvd); ASSERT3U(spa->spa_min_ashift, >=, SPA_MINBLOCKSHIFT); ASSERT3U(spa->spa_max_ashift, <=, SPA_MAXBLOCKSHIFT); if (type != SPA_IMPORT_ASSEMBLE) { ASSERT(spa_guid(spa) == pool_guid); } return (0); } /* * Recursively open all vdevs in the vdev tree. This function is called twice: * first with the untrusted config, then with the trusted config. */ static int spa_ld_open_vdevs(spa_t *spa) { int error = 0; /* * spa_missing_tvds_allowed defines how many top-level vdevs can be * missing/unopenable for the root vdev to be still considered openable. */ if (spa->spa_trust_config) { spa->spa_missing_tvds_allowed = zfs_max_missing_tvds; } else if (spa->spa_config_source == SPA_CONFIG_SRC_CACHEFILE) { spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_cachefile; } else if (spa->spa_config_source == SPA_CONFIG_SRC_SCAN) { spa->spa_missing_tvds_allowed = zfs_max_missing_tvds_scan; } else { spa->spa_missing_tvds_allowed = 0; } spa->spa_missing_tvds_allowed = MAX(zfs_max_missing_tvds, spa->spa_missing_tvds_allowed); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = vdev_open(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); if (spa->spa_missing_tvds != 0) { spa_load_note(spa, "vdev tree has %lld missing top-level " "vdevs.", (u_longlong_t)spa->spa_missing_tvds); if (spa->spa_trust_config && (spa->spa_mode & FWRITE)) { /* * Although theoretically we could allow users to open * incomplete pools in RW mode, we'd need to add a lot * of extra logic (e.g. adjust pool space to account * for missing vdevs). * This limitation also prevents users from accidentally * opening the pool in RW mode during data recovery and * damaging it further. */ spa_load_note(spa, "pools with missing top-level " "vdevs can only be opened in read-only mode."); error = SET_ERROR(ENXIO); } else { spa_load_note(spa, "current settings allow for maximum " "%lld missing top-level vdevs at this stage.", (u_longlong_t)spa->spa_missing_tvds_allowed); } } if (error != 0) { spa_load_failed(spa, "unable to open vdev tree [error=%d]", error); } if (spa->spa_missing_tvds != 0 || error != 0) vdev_dbgmsg_print_tree(spa->spa_root_vdev, 2); return (error); } /* * We need to validate the vdev labels against the configuration that * we have in hand. This function is called twice: first with an untrusted * config, then with a trusted config. The validation is more strict when the * config is trusted. */ static int spa_ld_validate_vdevs(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = vdev_validate(rvd); spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { spa_load_failed(spa, "vdev_validate failed [error=%d]", error); return (error); } if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) { spa_load_failed(spa, "cannot open vdev tree after invalidating " "some vdevs"); vdev_dbgmsg_print_tree(rvd, 2); return (SET_ERROR(ENXIO)); } return (0); } static void spa_ld_select_uberblock_done(spa_t *spa, uberblock_t *ub) { spa->spa_state = POOL_STATE_ACTIVE; spa->spa_ubsync = spa->spa_uberblock; spa->spa_verify_min_txg = spa->spa_extreme_rewind ? TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1; spa->spa_first_txg = spa->spa_last_ubsync_txg ? spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1; spa->spa_claim_max_txg = spa->spa_first_txg; spa->spa_prev_software_version = ub->ub_software_version; } static int spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) { vdev_t *rvd = spa->spa_root_vdev; nvlist_t *label; uberblock_t *ub = &spa->spa_uberblock; /* * If we are opening the checkpointed state of the pool by * rewinding to it, at this point we will have written the * checkpointed uberblock to the vdev labels, so searching * the labels will find the right uberblock. However, if * we are opening the checkpointed state read-only, we have * not modified the labels. Therefore, we must ignore the * labels and continue using the spa_uberblock that was set * by spa_ld_checkpoint_rewind. * * Note that it would be fine to ignore the labels when * rewinding (opening writeable) as well. However, if we * crash just after writing the labels, we will end up * searching the labels. Doing so in the common case means * that this code path gets exercised normally, rather than * just in the edge case. */ if (ub->ub_checkpoint_txg != 0 && spa_importing_readonly_checkpoint(spa)) { spa_ld_select_uberblock_done(spa, ub); return (0); } /* * Find the best uberblock. */ vdev_uberblock_load(rvd, ub, &label); /* * If we weren't able to find a single valid uberblock, return failure. */ if (ub->ub_txg == 0) { nvlist_free(label); spa_load_failed(spa, "no valid uberblock found"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); } spa_load_note(spa, "using uberblock with txg=%llu", (u_longlong_t)ub->ub_txg); /* * If the pool has an unsupported version we can't open it. */ if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) { nvlist_free(label); spa_load_failed(spa, "version %llu is not supported", (u_longlong_t)ub->ub_version); return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP)); } if (ub->ub_version >= SPA_VERSION_FEATURES) { nvlist_t *features; /* * If we weren't able to find what's necessary for reading the * MOS in the label, return failure. */ if (label == NULL) { spa_load_failed(spa, "label config unavailable"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); } if (nvlist_lookup_nvlist(label, ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) { nvlist_free(label); spa_load_failed(spa, "invalid label: '%s' missing", ZPOOL_CONFIG_FEATURES_FOR_READ); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); } /* * Update our in-core representation with the definitive values * from the label. */ nvlist_free(spa->spa_label_features); VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); } nvlist_free(label); /* * Look through entries in the label nvlist's features_for_read. If * there is a feature listed there which we don't understand then we * cannot open a pool. */ if (ub->ub_version >= SPA_VERSION_FEATURES) { nvlist_t *unsup_feat; VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features, NULL); nvp != NULL; nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) { if (!zfeature_is_supported(nvpair_name(nvp))) { VERIFY(nvlist_add_string(unsup_feat, nvpair_name(nvp), "") == 0); } } if (!nvlist_empty(unsup_feat)) { VERIFY(nvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0); nvlist_free(unsup_feat); spa_load_failed(spa, "some features are unsupported"); return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); } nvlist_free(unsup_feat); } if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_try_repair(spa, spa->spa_config); spa_config_exit(spa, SCL_ALL, FTAG); nvlist_free(spa->spa_config_splitting); spa->spa_config_splitting = NULL; } /* * Initialize internal SPA structures. */ spa_ld_select_uberblock_done(spa, ub); return (0); } static int spa_ld_open_rootbp(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool); if (error != 0) { spa_load_failed(spa, "unable to open rootbp in dsl_pool_init " "[error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset; return (0); } static int spa_ld_trusted_config(spa_t *spa, spa_import_type_t type, boolean_t reloading) { vdev_t *mrvd, *rvd = spa->spa_root_vdev; nvlist_t *nv, *mos_config, *policy; int error = 0, copy_error; uint64_t healthy_tvds, healthy_tvds_mos; uint64_t mos_config_txg; if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object, B_TRUE) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * If we're assembling a pool from a split, the config provided is * already trusted so there is nothing to do. */ if (type == SPA_IMPORT_ASSEMBLE) return (0); healthy_tvds = spa_healthy_core_tvds(spa); if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) { spa_load_failed(spa, "unable to retrieve MOS config"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } /* * If we are doing an open, pool owner wasn't verified yet, thus do * the verification here. */ if (spa->spa_load_state == SPA_LOAD_OPEN) { error = spa_verify_host(spa, mos_config); if (error != 0) { nvlist_free(mos_config); return (error); } } nv = fnvlist_lookup_nvlist(mos_config, ZPOOL_CONFIG_VDEV_TREE); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * Build a new vdev tree from the trusted config */ VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0); /* * Vdev paths in the MOS may be obsolete. If the untrusted config was * obtained by scanning /dev/dsk, then it will have the right vdev * paths. We update the trusted MOS config with this information. * We first try to copy the paths with vdev_copy_path_strict, which * succeeds only when both configs have exactly the same vdev tree. * If that fails, we fall back to a more flexible method that has a * best effort policy. */ copy_error = vdev_copy_path_strict(rvd, mrvd); if (copy_error != 0 || spa_load_print_vdev_tree) { spa_load_note(spa, "provided vdev tree:"); vdev_dbgmsg_print_tree(rvd, 2); spa_load_note(spa, "MOS vdev tree:"); vdev_dbgmsg_print_tree(mrvd, 2); } if (copy_error != 0) { spa_load_note(spa, "vdev_copy_path_strict failed, falling " "back to vdev_copy_path_relaxed"); vdev_copy_path_relaxed(rvd, mrvd); } vdev_close(rvd); vdev_free(rvd); spa->spa_root_vdev = mrvd; rvd = mrvd; spa_config_exit(spa, SCL_ALL, FTAG); /* * We will use spa_config if we decide to reload the spa or if spa_load * fails and we rewind. We must thus regenerate the config using the * MOS information with the updated paths. ZPOOL_LOAD_POLICY is used to * pass settings on how to load the pool and is not stored in the MOS. * We copy it over to our new, trusted config. */ mos_config_txg = fnvlist_lookup_uint64(mos_config, ZPOOL_CONFIG_POOL_TXG); nvlist_free(mos_config); mos_config = spa_config_generate(spa, NULL, mos_config_txg, B_FALSE); if (nvlist_lookup_nvlist(spa->spa_config, ZPOOL_LOAD_POLICY, &policy) == 0) fnvlist_add_nvlist(mos_config, ZPOOL_LOAD_POLICY, policy); spa_config_set(spa, mos_config); spa->spa_config_source = SPA_CONFIG_SRC_MOS; /* * Now that we got the config from the MOS, we should be more strict * in checking blkptrs and can make assumptions about the consistency * of the vdev tree. spa_trust_config must be set to true before opening * vdevs in order for them to be writeable. */ spa->spa_trust_config = B_TRUE; /* * Open and validate the new vdev tree */ error = spa_ld_open_vdevs(spa); if (error != 0) return (error); error = spa_ld_validate_vdevs(spa); if (error != 0) return (error); if (copy_error != 0 || spa_load_print_vdev_tree) { spa_load_note(spa, "final vdev tree:"); vdev_dbgmsg_print_tree(rvd, 2); } if (spa->spa_load_state != SPA_LOAD_TRYIMPORT && !spa->spa_extreme_rewind && zfs_max_missing_tvds == 0) { /* * Sanity check to make sure that we are indeed loading the * latest uberblock. If we missed SPA_SYNC_MIN_VDEVS tvds * in the config provided and they happened to be the only ones * to have the latest uberblock, we could involuntarily perform * an extreme rewind. */ healthy_tvds_mos = spa_healthy_core_tvds(spa); if (healthy_tvds_mos - healthy_tvds >= SPA_SYNC_MIN_VDEVS) { spa_load_note(spa, "config provided misses too many " "top-level vdevs compared to MOS (%lld vs %lld). ", (u_longlong_t)healthy_tvds, (u_longlong_t)healthy_tvds_mos); spa_load_note(spa, "vdev tree:"); vdev_dbgmsg_print_tree(rvd, 2); if (reloading) { spa_load_failed(spa, "config was already " "provided from MOS. Aborting."); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } spa_load_note(spa, "spa must be reloaded using MOS " "config"); return (SET_ERROR(EAGAIN)); } } error = spa_check_for_missing_logs(spa); if (error != 0) return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); if (rvd->vdev_guid_sum != spa->spa_uberblock.ub_guid_sum) { spa_load_failed(spa, "uberblock guid sum doesn't match MOS " "guid sum (%llu != %llu)", (u_longlong_t)spa->spa_uberblock.ub_guid_sum, (u_longlong_t)rvd->vdev_guid_sum); return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO)); } return (0); } static int spa_ld_open_indirect_vdev_metadata(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; /* * Everything that we read before spa_remove_init() must be stored * on concreted vdevs. Therefore we do this as early as possible. */ error = spa_remove_init(spa); if (error != 0) { spa_load_failed(spa, "spa_remove_init failed [error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } /* * Retrieve information needed to condense indirect vdev mappings. */ error = spa_condense_init(spa); if (error != 0) { spa_load_failed(spa, "spa_condense_init failed [error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } return (0); } static int spa_ld_check_features(spa_t *spa, boolean_t *missing_feat_writep) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; if (spa_version(spa) >= SPA_VERSION_FEATURES) { boolean_t missing_feat_read = B_FALSE; nvlist_t *unsup_feat, *enabled_feat; if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ, &spa->spa_feat_for_read_obj, B_TRUE) != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE, &spa->spa_feat_for_write_obj, B_TRUE) != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS, &spa->spa_feat_desc_obj, B_TRUE) != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } enabled_feat = fnvlist_alloc(); unsup_feat = fnvlist_alloc(); if (!spa_features_check(spa, B_FALSE, unsup_feat, enabled_feat)) missing_feat_read = B_TRUE; if (spa_writeable(spa) || spa->spa_load_state == SPA_LOAD_TRYIMPORT) { if (!spa_features_check(spa, B_TRUE, unsup_feat, enabled_feat)) { *missing_feat_writep = B_TRUE; } } fnvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat); if (!nvlist_empty(unsup_feat)) { fnvlist_add_nvlist(spa->spa_load_info, ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat); } fnvlist_free(enabled_feat); fnvlist_free(unsup_feat); if (!missing_feat_read) { fnvlist_add_boolean(spa->spa_load_info, ZPOOL_CONFIG_CAN_RDONLY); } /* * If the state is SPA_LOAD_TRYIMPORT, our objective is * twofold: to determine whether the pool is available for * import in read-write mode and (if it is not) whether the * pool is available for import in read-only mode. If the pool * is available for import in read-write mode, it is displayed * as available in userland; if it is not available for import * in read-only mode, it is displayed as unavailable in * userland. If the pool is available for import in read-only * mode but not read-write mode, it is displayed as unavailable * in userland with a special note that the pool is actually * available for open in read-only mode. * * As a result, if the state is SPA_LOAD_TRYIMPORT and we are * missing a feature for write, we must first determine whether * the pool can be opened read-only before returning to * userland in order to know whether to display the * abovementioned note. */ if (missing_feat_read || (*missing_feat_writep && spa_writeable(spa))) { spa_load_failed(spa, "pool uses unsupported features"); return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); } /* * Load refcounts for ZFS features from disk into an in-memory * cache during SPA initialization. */ for (spa_feature_t i = 0; i < SPA_FEATURES; i++) { uint64_t refcount; error = feature_get_refcount_from_disk(spa, &spa_feature_table[i], &refcount); if (error == 0) { spa->spa_feat_refcount_cache[i] = refcount; } else if (error == ENOTSUP) { spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED; } else { spa_load_failed(spa, "error getting refcount " "for feature %s [error=%d]", spa_feature_table[i].fi_guid, error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } } } if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) { if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG, &spa->spa_feat_enabled_txg_obj, B_TRUE) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } return (0); } static int spa_ld_load_special_directories(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; spa->spa_is_initializing = B_TRUE; error = dsl_pool_open(spa->spa_dsl_pool); spa->spa_is_initializing = B_FALSE; if (error != 0) { spa_load_failed(spa, "dsl_pool_open failed [error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } return (0); } static int spa_ld_get_props(spa_t *spa) { int error = 0; uint64_t obj; vdev_t *rvd = spa->spa_root_vdev; /* Grab the secret checksum salt from the MOS. */ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, sizeof (spa->spa_cksum_salt.zcs_bytes), spa->spa_cksum_salt.zcs_bytes); if (error == ENOENT) { /* Generate a new salt for subsequent use */ (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, sizeof (spa->spa_cksum_salt.zcs_bytes)); } else if (error != 0) { spa_load_failed(spa, "unable to retrieve checksum salt from " "MOS [error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj, B_TRUE) != 0) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj); if (error != 0) { spa_load_failed(spa, "error opening deferred-frees bpobj " "[error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } /* * Load the bit that tells us to use the new accounting function * (raid-z deflation). If we have an older pool, this will not * be present. */ error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION, &spa->spa_creation_version, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the persistent error log. If we have an older pool, this will * not be present. */ error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB, &spa->spa_errlog_scrub, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the history object. If we have an older pool, this * will not be present. */ error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); /* * Load the per-vdev ZAP map. If we have an older pool, this will not * be present; in this case, defer its creation to a later time to * avoid dirtying the MOS this early / out of sync context. See * spa_sync_config_object. */ /* The sentinel is only available in the MOS config. */ nvlist_t *mos_config; if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0) { spa_load_failed(spa, "unable to retrieve MOS config"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP, &spa->spa_all_vdev_zaps, B_FALSE); if (error == ENOENT) { VERIFY(!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); spa->spa_avz_action = AVZ_ACTION_INITIALIZE; ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); } else if (error != 0) { return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) { /* * An older version of ZFS overwrote the sentinel value, so * we have orphaned per-vdev ZAPs in the MOS. Defer their * destruction to later; see spa_sync_config_object. */ spa->spa_avz_action = AVZ_ACTION_DESTROY; /* * We're assuming that no vdevs have had their ZAPs created * before this. Better be sure of it. */ ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev)); } nvlist_free(mos_config); spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object, B_FALSE); if (error && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (error == 0) { uint64_t autoreplace; spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs); spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace); spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, &spa->spa_dedup_ditto); spa->spa_autoreplace = (autoreplace != 0); } /* * If we are importing a pool with missing top-level vdevs, * we enforce that the pool doesn't panic or get suspended on * error since the likelihood of missing data is extremely high. */ if (spa->spa_missing_tvds > 0 && spa->spa_failmode != ZIO_FAILURE_MODE_CONTINUE && spa->spa_load_state != SPA_LOAD_TRYIMPORT) { spa_load_note(spa, "forcing failmode to 'continue' " "as some top level vdevs are missing"); spa->spa_failmode = ZIO_FAILURE_MODE_CONTINUE; } return (0); } static int spa_ld_open_aux_vdevs(spa_t *spa, spa_import_type_t type) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; /* * If we're assembling the pool from the split-off vdevs of * an existing pool, we don't want to attach the spares & cache * devices. */ /* * Load any hot spares for this pool. */ error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { ASSERT(spa_version(spa) >= SPA_VERSION_SPARES); if (load_nvlist(spa, spa->spa_spares.sav_object, &spa->spa_spares.sav_config) != 0) { spa_load_failed(spa, "error loading spares nvlist"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); } else if (error == 0) { spa->spa_spares.sav_sync = B_TRUE; } /* * Load any level 2 ARC devices for this pool. */ error = spa_dir_prop(spa, DMU_POOL_L2CACHE, &spa->spa_l2cache.sav_object, B_FALSE); if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); if (error == 0 && type != SPA_IMPORT_ASSEMBLE) { ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE); if (load_nvlist(spa, spa->spa_l2cache.sav_object, &spa->spa_l2cache.sav_config) != 0) { spa_load_failed(spa, "error loading l2cache nvlist"); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); } else if (error == 0) { spa->spa_l2cache.sav_sync = B_TRUE; } return (0); } static int spa_ld_load_vdev_metadata(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; /* * If the 'autoreplace' property is set, then post a resource notifying * the ZFS DE that it should not issue any faults for unopenable * devices. We also iterate over the vdevs, and post a sysevent for any * unopenable vdevs so that the normal autoreplace handler can take * over. */ if (spa->spa_autoreplace && spa->spa_load_state != SPA_LOAD_TRYIMPORT) { spa_check_removed(spa->spa_root_vdev); /* * For the import case, this is done in spa_import(), because * at this point we're using the spare definitions from * the MOS config, not necessarily from the userland config. */ if (spa->spa_load_state != SPA_LOAD_IMPORT) { spa_aux_check_removed(&spa->spa_spares); spa_aux_check_removed(&spa->spa_l2cache); } } /* * Load the vdev metadata such as metaslabs, DTLs, spacemap object, etc. */ error = vdev_load(rvd); if (error != 0) { spa_load_failed(spa, "vdev_load failed [error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } /* * Propagate the leaf DTLs we just loaded all the way up the vdev tree. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_dtl_reassess(rvd, 0, 0, B_FALSE); spa_config_exit(spa, SCL_ALL, FTAG); return (0); } static int spa_ld_load_dedup_tables(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; error = ddt_load(spa); if (error != 0) { spa_load_failed(spa, "ddt_load failed [error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); } return (0); } static int spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, char **ereport) { vdev_t *rvd = spa->spa_root_vdev; if (type != SPA_IMPORT_ASSEMBLE && spa_writeable(spa)) { boolean_t missing = spa_check_logs(spa); if (missing) { if (spa->spa_missing_tvds != 0) { spa_load_note(spa, "spa_check_logs failed " "so dropping the logs"); } else { *ereport = FM_EREPORT_ZFS_LOG_REPLAY; spa_load_failed(spa, "spa_check_logs failed"); return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO)); } } } return (0); } static int spa_ld_verify_pool_data(spa_t *spa) { int error = 0; vdev_t *rvd = spa->spa_root_vdev; /* * We've successfully opened the pool, verify that we're ready * to start pushing transactions. */ if (spa->spa_load_state != SPA_LOAD_TRYIMPORT) { error = spa_load_verify(spa); if (error != 0) { spa_load_failed(spa, "spa_load_verify failed " "[error=%d]", error); return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, error)); } } return (0); } static void spa_ld_claim_log_blocks(spa_t *spa) { dmu_tx_t *tx; dsl_pool_t *dp = spa_get_dsl(spa); /* * Claim log blocks that haven't been committed yet. * This must all happen in a single txg. * Note: spa_claim_max_txg is updated by spa_claim_notify(), * invoked from zil_claim_log_block()'s i/o done callback. * Price of rollback is that we abandon the log. */ spa->spa_claiming = B_TRUE; tx = dmu_tx_create_assigned(dp, spa_first_txg(spa)); (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj, zil_claim, tx, DS_FIND_CHILDREN); dmu_tx_commit(tx); spa->spa_claiming = B_FALSE; spa_set_log_state(spa, SPA_LOG_GOOD); } static void spa_ld_check_for_config_update(spa_t *spa, uint64_t config_cache_txg, boolean_t update_config_cache) { vdev_t *rvd = spa->spa_root_vdev; int need_update = B_FALSE; /* * If the config cache is stale, or we have uninitialized * metaslabs (see spa_vdev_add()), then update the config. * * If this is a verbatim import, trust the current * in-core spa_config and update the disk labels. */ if (update_config_cache || config_cache_txg != spa->spa_config_txg || spa->spa_load_state == SPA_LOAD_IMPORT || spa->spa_load_state == SPA_LOAD_RECOVER || (spa->spa_import_flags & ZFS_IMPORT_VERBATIM)) need_update = B_TRUE; for (int c = 0; c < rvd->vdev_children; c++) if (rvd->vdev_child[c]->vdev_ms_array == 0) need_update = B_TRUE; /* * Update the config cache asychronously in case we're the * root pool, in which case the config cache isn't writable yet. */ if (need_update) spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE); } static void spa_ld_prepare_for_reload(spa_t *spa) { int mode = spa->spa_mode; int async_suspended = spa->spa_async_suspended; spa_unload(spa); spa_deactivate(spa); spa_activate(spa, mode); /* * We save the value of spa_async_suspended as it gets reset to 0 by * spa_unload(). We want to restore it back to the original value before * returning as we might be calling spa_async_resume() later. */ spa->spa_async_suspended = async_suspended; } static int spa_ld_read_checkpoint_txg(spa_t *spa) { uberblock_t checkpoint; int error = 0; ASSERT0(spa->spa_checkpoint_txg); ASSERT(MUTEX_HELD(&spa_namespace_lock)); error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); if (error == ENOENT) return (0); if (error != 0) return (error); ASSERT3U(checkpoint.ub_txg, !=, 0); ASSERT3U(checkpoint.ub_checkpoint_txg, !=, 0); ASSERT3U(checkpoint.ub_timestamp, !=, 0); spa->spa_checkpoint_txg = checkpoint.ub_txg; spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp; return (0); } static int spa_ld_mos_init(spa_t *spa, spa_import_type_t type) { int error = 0; ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); /* * Never trust the config that is provided unless we are assembling * a pool following a split. * This means don't trust blkptrs and the vdev tree in general. This * also effectively puts the spa in read-only mode since * spa_writeable() checks for spa_trust_config to be true. * We will later load a trusted config from the MOS. */ if (type != SPA_IMPORT_ASSEMBLE) spa->spa_trust_config = B_FALSE; /* * Parse the config provided to create a vdev tree. */ error = spa_ld_parse_config(spa, type); if (error != 0) return (error); /* * Now that we have the vdev tree, try to open each vdev. This involves * opening the underlying physical device, retrieving its geometry and * probing the vdev with a dummy I/O. The state of each vdev will be set * based on the success of those operations. After this we'll be ready * to read from the vdevs. */ error = spa_ld_open_vdevs(spa); if (error != 0) return (error); /* * Read the label of each vdev and make sure that the GUIDs stored * there match the GUIDs in the config provided. * If we're assembling a new pool that's been split off from an * existing pool, the labels haven't yet been updated so we skip * validation for now. */ if (type != SPA_IMPORT_ASSEMBLE) { error = spa_ld_validate_vdevs(spa); if (error != 0) return (error); } /* * Read all vdev labels to find the best uberblock (i.e. latest, * unless spa_load_max_txg is set) and store it in spa_uberblock. We * get the list of features required to read blkptrs in the MOS from * the vdev label with the best uberblock and verify that our version * of zfs supports them all. */ error = spa_ld_select_uberblock(spa, type); if (error != 0) return (error); /* * Pass that uberblock to the dsl_pool layer which will open the root * blkptr. This blkptr points to the latest version of the MOS and will * allow us to read its contents. */ error = spa_ld_open_rootbp(spa); if (error != 0) return (error); return (0); } static int spa_ld_checkpoint_rewind(spa_t *spa) { uberblock_t checkpoint; int error = 0; ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ZPOOL_CHECKPOINT, sizeof (uint64_t), sizeof (uberblock_t) / sizeof (uint64_t), &checkpoint); if (error != 0) { spa_load_failed(spa, "unable to retrieve checkpointed " "uberblock from the MOS config [error=%d]", error); if (error == ENOENT) error = ZFS_ERR_NO_CHECKPOINT; return (error); } ASSERT3U(checkpoint.ub_txg, <, spa->spa_uberblock.ub_txg); ASSERT3U(checkpoint.ub_txg, ==, checkpoint.ub_checkpoint_txg); /* * We need to update the txg and timestamp of the checkpointed * uberblock to be higher than the latest one. This ensures that * the checkpointed uberblock is selected if we were to close and * reopen the pool right after we've written it in the vdev labels. * (also see block comment in vdev_uberblock_compare) */ checkpoint.ub_txg = spa->spa_uberblock.ub_txg + 1; checkpoint.ub_timestamp = gethrestime_sec(); /* * Set current uberblock to be the checkpointed uberblock. */ spa->spa_uberblock = checkpoint; /* * If we are doing a normal rewind, then the pool is open for * writing and we sync the "updated" checkpointed uberblock to * disk. Once this is done, we've basically rewound the whole * pool and there is no way back. * * There are cases when we don't want to attempt and sync the * checkpointed uberblock to disk because we are opening a * pool as read-only. Specifically, verifying the checkpointed * state with zdb, and importing the checkpointed state to get * a "preview" of its content. */ if (spa_writeable(spa)) { vdev_t *rvd = spa->spa_root_vdev; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; int c0 = spa_get_random(children); for (int c = 0; c < children; c++) { vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; /* Stop when revisiting the first vdev */ if (c > 0 && svd[0] == vd) break; if (vd->vdev_ms_array == 0 || vd->vdev_islog || !vdev_is_concrete(vd)) continue; svd[svdcount++] = vd; if (svdcount == SPA_SYNC_MIN_VDEVS) break; } error = vdev_config_sync(svd, svdcount, spa->spa_first_txg); if (error == 0) spa->spa_last_synced_guid = rvd->vdev_guid; spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { spa_load_failed(spa, "failed to write checkpointed " "uberblock to the vdev labels [error=%d]", error); return (error); } } return (0); } static int spa_ld_mos_with_trusted_config(spa_t *spa, spa_import_type_t type, boolean_t *update_config_cache) { int error; /* * Parse the config for pool, open and validate vdevs, * select an uberblock, and use that uberblock to open * the MOS. */ error = spa_ld_mos_init(spa, type); if (error != 0) return (error); /* * Retrieve the trusted config stored in the MOS and use it to create * a new, exact version of the vdev tree, then reopen all vdevs. */ error = spa_ld_trusted_config(spa, type, B_FALSE); if (error == EAGAIN) { if (update_config_cache != NULL) *update_config_cache = B_TRUE; /* * Redo the loading process with the trusted config if it is * too different from the untrusted config. */ spa_ld_prepare_for_reload(spa); spa_load_note(spa, "RELOADING"); error = spa_ld_mos_init(spa, type); if (error != 0) return (error); error = spa_ld_trusted_config(spa, type, B_TRUE); if (error != 0) return (error); } else if (error != 0) { return (error); } return (0); } /* * Load an existing storage pool, using the config provided. This config * describes which vdevs are part of the pool and is later validated against * partial configs present in each vdev's label and an entire copy of the * config stored in the MOS. */ static int spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) { int error = 0; boolean_t missing_feat_write = B_FALSE; boolean_t checkpoint_rewind = (spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); boolean_t update_config_cache = B_FALSE; ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa->spa_config_source != SPA_CONFIG_SRC_NONE); spa_load_note(spa, "LOADING"); error = spa_ld_mos_with_trusted_config(spa, type, &update_config_cache); if (error != 0) return (error); /* * If we are rewinding to the checkpoint then we need to repeat * everything we've done so far in this function but this time * selecting the checkpointed uberblock and using that to open * the MOS. */ if (checkpoint_rewind) { /* * If we are rewinding to the checkpoint update config cache * anyway. */ update_config_cache = B_TRUE; /* * Extract the checkpointed uberblock from the current MOS * and use this as the pool's uberblock from now on. If the * pool is imported as writeable we also write the checkpoint * uberblock to the labels, making the rewind permanent. */ error = spa_ld_checkpoint_rewind(spa); if (error != 0) return (error); /* * Redo the loading process process again with the * checkpointed uberblock. */ spa_ld_prepare_for_reload(spa); spa_load_note(spa, "LOADING checkpointed uberblock"); error = spa_ld_mos_with_trusted_config(spa, type, NULL); if (error != 0) return (error); } /* * Retrieve the checkpoint txg if the pool has a checkpoint. */ error = spa_ld_read_checkpoint_txg(spa); if (error != 0) return (error); /* * Retrieve the mapping of indirect vdevs. Those vdevs were removed * from the pool and their contents were re-mapped to other vdevs. Note * that everything that we read before this step must have been * rewritten on concrete vdevs after the last device removal was * initiated. Otherwise we could be reading from indirect vdevs before * we have loaded their mappings. */ error = spa_ld_open_indirect_vdev_metadata(spa); if (error != 0) return (error); /* * Retrieve the full list of active features from the MOS and check if * they are all supported. */ error = spa_ld_check_features(spa, &missing_feat_write); if (error != 0) return (error); /* * Load several special directories from the MOS needed by the dsl_pool * layer. */ error = spa_ld_load_special_directories(spa); if (error != 0) return (error); /* * Retrieve pool properties from the MOS. */ error = spa_ld_get_props(spa); if (error != 0) return (error); /* * Retrieve the list of auxiliary devices - cache devices and spares - * and open them. */ error = spa_ld_open_aux_vdevs(spa, type); if (error != 0) return (error); /* * Load the metadata for all vdevs. Also check if unopenable devices * should be autoreplaced. */ error = spa_ld_load_vdev_metadata(spa); if (error != 0) return (error); error = spa_ld_load_dedup_tables(spa); if (error != 0) return (error); /* * Verify the logs now to make sure we don't have any unexpected errors * when we claim log blocks later. */ error = spa_ld_verify_logs(spa, type, ereport); if (error != 0) return (error); if (missing_feat_write) { ASSERT(spa->spa_load_state == SPA_LOAD_TRYIMPORT); /* * At this point, we know that we can open the pool in * read-only mode but not read-write mode. We now have enough * information and can return to userland. */ return (spa_vdev_err(spa->spa_root_vdev, VDEV_AUX_UNSUP_FEAT, ENOTSUP)); } /* * Traverse the last txgs to make sure the pool was left off in a safe * state. When performing an extreme rewind, we verify the whole pool, * which can take a very long time. */ error = spa_ld_verify_pool_data(spa); if (error != 0) return (error); /* * Calculate the deflated space for the pool. This must be done before * we write anything to the pool because we'd need to update the space * accounting using the deflated sizes. */ spa_update_dspace(spa); /* * We have now retrieved all the information we needed to open the * pool. If we are importing the pool in read-write mode, a few * additional steps must be performed to finish the import. */ if (spa_writeable(spa) && (spa->spa_load_state == SPA_LOAD_RECOVER || spa->spa_load_max_txg == UINT64_MAX)) { uint64_t config_cache_txg = spa->spa_config_txg; ASSERT(spa->spa_load_state != SPA_LOAD_TRYIMPORT); /* * In case of a checkpoint rewind, log the original txg * of the checkpointed uberblock. */ if (checkpoint_rewind) { spa_history_log_internal(spa, "checkpoint rewind", NULL, "rewound state to txg=%llu", (u_longlong_t)spa->spa_uberblock.ub_checkpoint_txg); } /* * Traverse the ZIL and claim all blocks. */ spa_ld_claim_log_blocks(spa); /* * Kick-off the syncing thread. */ spa->spa_sync_on = B_TRUE; txg_sync_start(spa->spa_dsl_pool); /* * Wait for all claims to sync. We sync up to the highest * claimed log block birth time so that claimed log blocks * don't appear to be from the future. spa_claim_max_txg * will have been set for us by ZIL traversal operations * performed above. */ txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg); /* * Check if we need to request an update of the config. On the * next sync, we would update the config stored in vdev labels * and the cachefile (by default /etc/zfs/zpool.cache). */ spa_ld_check_for_config_update(spa, config_cache_txg, update_config_cache); /* * Check all DTLs to see if anything needs resilvering. */ if (!dsl_scan_resilvering(spa->spa_dsl_pool) && vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) spa_async_request(spa, SPA_ASYNC_RESILVER); /* * Log the fact that we booted up (so that we can detect if * we rebooted in the middle of an operation). */ spa_history_log_version(spa, "open"); /* * Delete any inconsistent datasets. */ (void) dmu_objset_find(spa_name(spa), dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN); /* * Clean up any stale temporary dataset userrefs. */ dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool); spa_restart_removal(spa); spa_spawn_aux_threads(spa); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); vdev_initialize_restart(spa->spa_root_vdev); spa_config_exit(spa, SCL_CONFIG, FTAG); } spa_load_note(spa, "LOADED"); return (0); } static int spa_load_retry(spa_t *spa, spa_load_state_t state) { int mode = spa->spa_mode; spa_unload(spa); spa_deactivate(spa); spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1; spa_activate(spa, mode); spa_async_suspend(spa); spa_load_note(spa, "spa_load_retry: rewind, max txg: %llu", (u_longlong_t)spa->spa_load_max_txg); return (spa_load(spa, state, SPA_IMPORT_EXISTING)); } /* * If spa_load() fails this function will try loading prior txg's. If * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this * function will not rewind the pool and will return the same error as * spa_load(). */ static int spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, int rewind_flags) { nvlist_t *loadinfo = NULL; nvlist_t *config = NULL; int load_error, rewind_error; uint64_t safe_rewind_txg; uint64_t min_txg; if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) { spa->spa_load_max_txg = spa->spa_load_txg; spa_set_log_state(spa, SPA_LOG_CLEAR); } else { spa->spa_load_max_txg = max_request; if (max_request != UINT64_MAX) spa->spa_extreme_rewind = B_TRUE; } load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING); if (load_error == 0) return (0); if (load_error == ZFS_ERR_NO_CHECKPOINT) { /* * When attempting checkpoint-rewind on a pool with no * checkpoint, we should not attempt to load uberblocks * from previous txgs when spa_load fails. */ ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); return (load_error); } if (spa->spa_root_vdev != NULL) config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg; spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp; if (rewind_flags & ZPOOL_NEVER_REWIND) { nvlist_free(config); return (load_error); } if (state == SPA_LOAD_RECOVER) { /* Price of rolling back is discarding txgs, including log */ spa_set_log_state(spa, SPA_LOG_CLEAR); } else { /* * If we aren't rolling back save the load info from our first * import attempt so that we can restore it after attempting * to rewind. */ loadinfo = spa->spa_load_info; spa->spa_load_info = fnvlist_alloc(); } spa->spa_load_max_txg = spa->spa_last_ubsync_txg; safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE; min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ? TXG_INITIAL : safe_rewind_txg; /* * Continue as long as we're finding errors, we're still within * the acceptable rewind range, and we're still finding uberblocks */ while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg && spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) { if (spa->spa_load_max_txg < safe_rewind_txg) spa->spa_extreme_rewind = B_TRUE; rewind_error = spa_load_retry(spa, state); } spa->spa_extreme_rewind = B_FALSE; spa->spa_load_max_txg = UINT64_MAX; if (config && (rewind_error || state != SPA_LOAD_RECOVER)) spa_config_set(spa, config); else nvlist_free(config); if (state == SPA_LOAD_RECOVER) { ASSERT3P(loadinfo, ==, NULL); return (rewind_error); } else { /* Store the rewind info as part of the initial load info */ fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO, spa->spa_load_info); /* Restore the initial load info */ fnvlist_free(spa->spa_load_info); spa->spa_load_info = loadinfo; return (load_error); } } /* * Pool Open/Import * * The import case is identical to an open except that the configuration is sent * down from userland, instead of grabbed from the configuration cache. For the * case of an open, the pool configuration will exist in the * POOL_STATE_UNINITIALIZED state. * * The stats information (gen/count/ustats) is used to gather vdev statistics at * the same time open the pool, without having to keep around the spa_t in some * ambiguous state. */ static int spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, nvlist_t **config) { spa_t *spa; spa_load_state_t state = SPA_LOAD_OPEN; int error; int locked = B_FALSE; int firstopen = B_FALSE; *spapp = NULL; /* * As disgusting as this is, we need to support recursive calls to this * function because dsl_dir_open() is called during spa_load(), and ends * up calling spa_open() again. The real fix is to figure out how to * avoid dsl_dir_open() calling this in the first place. */ if (mutex_owner(&spa_namespace_lock) != curthread) { mutex_enter(&spa_namespace_lock); locked = B_TRUE; } if ((spa = spa_lookup(pool)) == NULL) { if (locked) mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENOENT)); } if (spa->spa_state == POOL_STATE_UNINITIALIZED) { zpool_load_policy_t policy; firstopen = B_TRUE; zpool_get_load_policy(nvpolicy ? nvpolicy : spa->spa_config, &policy); if (policy.zlp_rewind & ZPOOL_DO_REWIND) state = SPA_LOAD_RECOVER; spa_activate(spa, spa_mode_global); if (state != SPA_LOAD_RECOVER) spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; zfs_dbgmsg("spa_open_common: opening %s", pool); error = spa_load_best(spa, state, policy.zlp_txg, policy.zlp_rewind); if (error == EBADF) { /* * If vdev_validate() returns failure (indicated by * EBADF), it indicates that one of the vdevs indicates * that the pool has been exported or destroyed. If * this is the case, the config cache is out of sync and * we should remove the pool from the namespace. */ spa_unload(spa); spa_deactivate(spa); spa_write_cachefile(spa, B_TRUE, B_TRUE); spa_remove(spa); if (locked) mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENOENT)); } if (error) { /* * We can't open the pool, but we still have useful * information: the state of each vdev after the * attempted vdev_open(). Return this to the user. */ if (config != NULL && spa->spa_config) { VERIFY(nvlist_dup(spa->spa_config, config, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); } spa_unload(spa); spa_deactivate(spa); spa->spa_last_open_failed = error; if (locked) mutex_exit(&spa_namespace_lock); *spapp = NULL; return (error); } } spa_open_ref(spa, tag); if (config != NULL) *config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); /* * If we've recovered the pool, pass back any information we * gathered while doing the load. */ if (state == SPA_LOAD_RECOVER) { VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); } if (locked) { spa->spa_last_open_failed = 0; spa->spa_last_ubsync_txg = 0; spa->spa_load_txg = 0; mutex_exit(&spa_namespace_lock); #ifdef __FreeBSD__ #ifdef _KERNEL if (firstopen) zvol_create_minors(spa->spa_name); #endif #endif } *spapp = spa; return (0); } int spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy, nvlist_t **config) { return (spa_open_common(name, spapp, tag, policy, config)); } int spa_open(const char *name, spa_t **spapp, void *tag) { return (spa_open_common(name, spapp, tag, NULL, NULL)); } /* * Lookup the given spa_t, incrementing the inject count in the process, * preventing it from being exported or destroyed. */ spa_t * spa_inject_addref(char *name) { spa_t *spa; mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); return (NULL); } spa->spa_inject_ref++; mutex_exit(&spa_namespace_lock); return (spa); } void spa_inject_delref(spa_t *spa) { mutex_enter(&spa_namespace_lock); spa->spa_inject_ref--; mutex_exit(&spa_namespace_lock); } /* * Add spares device information to the nvlist. */ static void spa_add_spares(spa_t *spa, nvlist_t *config) { nvlist_t **spares; uint_t i, nspares; nvlist_t *nvroot; uint64_t guid; vdev_stat_t *vs; uint_t vsc; uint64_t pool; ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); if (spa->spa_spares.sav_count == 0) return; VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); if (nspares != 0) { VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0); /* * Go through and find any spares which have since been * repurposed as an active spare. If this is the case, update * their status appropriately. */ for (i = 0; i < nspares; i++) { VERIFY(nvlist_lookup_uint64(spares[i], ZPOOL_CONFIG_GUID, &guid) == 0); if (spa_spare_exists(guid, &pool, NULL) && pool != 0ULL) { VERIFY(nvlist_lookup_uint64_array( spares[i], ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); vs->vs_state = VDEV_STATE_CANT_OPEN; vs->vs_aux = VDEV_AUX_SPARED; } } } } /* * Add l2cache device information to the nvlist, including vdev stats. */ static void spa_add_l2cache(spa_t *spa, nvlist_t *config) { nvlist_t **l2cache; uint_t i, j, nl2cache; nvlist_t *nvroot; uint64_t guid; vdev_t *vd; vdev_stat_t *vs; uint_t vsc; ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); if (spa->spa_l2cache.sav_count == 0) return; VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); if (nl2cache != 0) { VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); /* * Update level 2 cache device stats. */ for (i = 0; i < nl2cache; i++) { VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID, &guid) == 0); vd = NULL; for (j = 0; j < spa->spa_l2cache.sav_count; j++) { if (guid == spa->spa_l2cache.sav_vdevs[j]->vdev_guid) { vd = spa->spa_l2cache.sav_vdevs[j]; break; } } ASSERT(vd != NULL); VERIFY(nvlist_lookup_uint64_array(l2cache[i], ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); vdev_get_stats(vd, vs); } } } static void -spa_add_feature_stats(spa_t *spa, nvlist_t *config) +spa_feature_stats_from_disk(spa_t *spa, nvlist_t *features) { - nvlist_t *features; zap_cursor_t zc; zap_attribute_t za; - ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); - VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0); - /* We may be unable to read features if pool is suspended. */ if (spa_suspended(spa)) - goto out; + return; if (spa->spa_feat_for_read_obj != 0) { for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_feat_for_read_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { ASSERT(za.za_integer_length == sizeof (uint64_t) && za.za_num_integers == 1); - VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, + VERIFY0(nvlist_add_uint64(features, za.za_name, za.za_first_integer)); } zap_cursor_fini(&zc); } if (spa->spa_feat_for_write_obj != 0) { for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_feat_for_write_obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { ASSERT(za.za_integer_length == sizeof (uint64_t) && za.za_num_integers == 1); - VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name, + VERIFY0(nvlist_add_uint64(features, za.za_name, za.za_first_integer)); } zap_cursor_fini(&zc); } +} -out: - VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, - features) == 0); - nvlist_free(features); +static void +spa_feature_stats_from_cache(spa_t *spa, nvlist_t *features) +{ + int i; + + for (i = 0; i < SPA_FEATURES; i++) { + zfeature_info_t feature = spa_feature_table[i]; + uint64_t refcount; + + if (feature_get_refcount(spa, &feature, &refcount) != 0) + continue; + + VERIFY0(nvlist_add_uint64(features, feature.fi_guid, refcount)); + } +} + +/* + * Store a list of pool features and their reference counts in the + * config. + * + * The first time this is called on a spa, allocate a new nvlist, fetch + * the pool features and reference counts from disk, then save the list + * in the spa. In subsequent calls on the same spa use the saved nvlist + * and refresh its values from the cached reference counts. This + * ensures we don't block here on I/O on a suspended pool so 'zpool + * clear' can resume the pool. + */ +static void +spa_add_feature_stats(spa_t *spa, nvlist_t *config) +{ + nvlist_t *features; + + ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER)); + + mutex_enter(&spa->spa_feat_stats_lock); + features = spa->spa_feat_stats; + + if (features != NULL) { + spa_feature_stats_from_cache(spa, features); + } else { + VERIFY0(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP)); + spa->spa_feat_stats = features; + spa_feature_stats_from_disk(spa, features); + } + + VERIFY0(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS, + features)); + + mutex_exit(&spa->spa_feat_stats_lock); } int spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen) { int error; spa_t *spa; *config = NULL; error = spa_open_common(name, &spa, FTAG, NULL, config); if (spa != NULL) { /* * This still leaves a window of inconsistency where the spares * or l2cache devices could change and the config would be * self-inconsistent. */ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); if (*config != NULL) { uint64_t loadtimes[2]; loadtimes[0] = spa->spa_loaded_ts.tv_sec; loadtimes[1] = spa->spa_loaded_ts.tv_nsec; VERIFY(nvlist_add_uint64_array(*config, ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0); VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_ERRCOUNT, spa_get_errlog_size(spa)) == 0); if (spa_suspended(spa)) VERIFY(nvlist_add_uint64(*config, ZPOOL_CONFIG_SUSPENDED, spa->spa_failmode) == 0); spa_add_spares(spa, *config); spa_add_l2cache(spa, *config); spa_add_feature_stats(spa, *config); } } /* * We want to get the alternate root even for faulted pools, so we cheat * and call spa_lookup() directly. */ if (altroot) { if (spa == NULL) { mutex_enter(&spa_namespace_lock); spa = spa_lookup(name); if (spa) spa_altroot(spa, altroot, buflen); else altroot[0] = '\0'; spa = NULL; mutex_exit(&spa_namespace_lock); } else { spa_altroot(spa, altroot, buflen); } } if (spa != NULL) { spa_config_exit(spa, SCL_CONFIG, FTAG); spa_close(spa, FTAG); } return (error); } /* * Validate that the auxiliary device array is well formed. We must have an * array of nvlists, each which describes a valid leaf vdev. If this is an * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be * specified, as long as they are well-formed. */ static int spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode, spa_aux_vdev_t *sav, const char *config, uint64_t version, vdev_labeltype_t label) { nvlist_t **dev; uint_t i, ndev; vdev_t *vd; int error; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); /* * It's acceptable to have no devs specified. */ if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0) return (0); if (ndev == 0) return (SET_ERROR(EINVAL)); /* * Make sure the pool is formatted with a version that supports this * device type. */ if (spa_version(spa) < version) return (SET_ERROR(ENOTSUP)); /* * Set the pending device list so we correctly handle device in-use * checking. */ sav->sav_pending = dev; sav->sav_npending = ndev; for (i = 0; i < ndev; i++) { if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0, mode)) != 0) goto out; if (!vd->vdev_ops->vdev_op_leaf) { vdev_free(vd); error = SET_ERROR(EINVAL); goto out; } /* * The L2ARC currently only supports disk devices in * kernel context. For user-level testing, we allow it. */ #ifdef _KERNEL if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) && strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) { error = SET_ERROR(ENOTBLK); vdev_free(vd); goto out; } #endif vd->vdev_top = vd; if ((error = vdev_open(vd)) == 0 && (error = vdev_label_init(vd, crtxg, label)) == 0) { VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); } vdev_free(vd); if (error && (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE)) goto out; else error = 0; } out: sav->sav_pending = NULL; sav->sav_npending = 0; return (error); } static int spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode) { int error; ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode, &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES, VDEV_LABEL_SPARE)) != 0) { return (error); } return (spa_validate_aux_devs(spa, nvroot, crtxg, mode, &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE, VDEV_LABEL_L2CACHE)); } static void spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, const char *config) { int i; if (sav->sav_config != NULL) { nvlist_t **olddevs; uint_t oldndevs; nvlist_t **newdevs; /* * Generate new dev list by concatentating with the * current dev list. */ VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config, &olddevs, &oldndevs) == 0); newdevs = kmem_alloc(sizeof (void *) * (ndevs + oldndevs), KM_SLEEP); for (i = 0; i < oldndevs; i++) VERIFY(nvlist_dup(olddevs[i], &newdevs[i], KM_SLEEP) == 0); for (i = 0; i < ndevs; i++) VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], KM_SLEEP) == 0); VERIFY(nvlist_remove(sav->sav_config, config, DATA_TYPE_NVLIST_ARRAY) == 0); VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, newdevs, ndevs + oldndevs) == 0); for (i = 0; i < oldndevs + ndevs; i++) nvlist_free(newdevs[i]); kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *)); } else { /* * Generate a new dev list. */ VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, devs, ndevs) == 0); } } /* * Stop and drop level 2 ARC devices */ void spa_l2cache_drop(spa_t *spa) { vdev_t *vd; int i; spa_aux_vdev_t *sav = &spa->spa_l2cache; for (i = 0; i < sav->sav_count; i++) { uint64_t pool; vd = sav->sav_vdevs[i]; ASSERT(vd != NULL); if (spa_l2cache_exists(vd->vdev_guid, &pool) && pool != 0ULL && l2arc_vdev_present(vd)) l2arc_remove_vdev(vd); } } /* * Pool Creation */ int spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, nvlist_t *zplprops) { spa_t *spa; char *altroot = NULL; vdev_t *rvd; dsl_pool_t *dp; dmu_tx_t *tx; int error = 0; uint64_t txg = TXG_INITIAL; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; uint64_t version, obj; boolean_t has_features; char *poolname; nvlist_t *nvl; if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_TNAME), &poolname) != 0) poolname = (char *)pool; /* * If this pool already exists, return failure. */ mutex_enter(&spa_namespace_lock); if (spa_lookup(poolname) != NULL) { mutex_exit(&spa_namespace_lock); return (SET_ERROR(EEXIST)); } /* * Allocate a new spa_t structure. */ nvl = fnvlist_alloc(); fnvlist_add_string(nvl, ZPOOL_CONFIG_POOL_NAME, pool); (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); spa = spa_add(poolname, nvl, altroot); fnvlist_free(nvl); spa_activate(spa, spa_mode_global); if (props && (error = spa_prop_validate(spa, props))) { spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } /* * Temporary pool names should never be written to disk. */ if (poolname != pool) spa->spa_import_flags |= ZFS_IMPORT_TEMP_NAME; has_features = B_FALSE; for (nvpair_t *elem = nvlist_next_nvpair(props, NULL); elem != NULL; elem = nvlist_next_nvpair(props, elem)) { if (zpool_prop_feature(nvpair_name(elem))) has_features = B_TRUE; } if (has_features || nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) { version = SPA_VERSION; } ASSERT(SPA_VERSION_IS_SUPPORTED(version)); spa->spa_first_txg = txg; spa->spa_uberblock.ub_txg = txg - 1; spa->spa_uberblock.ub_version = version; spa->spa_ubsync = spa->spa_uberblock; spa->spa_load_state = SPA_LOAD_CREATE; spa->spa_removing_phys.sr_state = DSS_NONE; spa->spa_removing_phys.sr_removing_vdev = -1; spa->spa_removing_phys.sr_prev_indirect_vdev = -1; /* * Create "The Godfather" zio to hold all async IOs */ spa->spa_async_zio_root = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); for (int i = 0; i < max_ncpus; i++) { spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER); } /* * Create the root vdev. */ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD); ASSERT(error != 0 || rvd != NULL); ASSERT(error != 0 || spa->spa_root_vdev == rvd); if (error == 0 && !zfs_allocatable_devs(nvroot)) error = SET_ERROR(EINVAL); if (error == 0 && (error = vdev_create(rvd, txg, B_FALSE)) == 0 && (error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) == 0) { for (int c = 0; c < rvd->vdev_children; c++) { vdev_ashift_optimize(rvd->vdev_child[c]); vdev_metaslab_set_size(rvd->vdev_child[c]); vdev_expand(rvd->vdev_child[c], txg); } } spa_config_exit(spa, SCL_ALL, FTAG); if (error != 0) { spa_unload(spa); spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } /* * Get the list of spares, if specified. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_spares.sav_sync = B_TRUE; } /* * Get the list of level 2 cache devices, if specified. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_l2cache.sav_sync = B_TRUE; } spa->spa_is_initializing = B_TRUE; spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg); spa->spa_meta_objset = dp->dp_meta_objset; spa->spa_is_initializing = B_FALSE; /* * Create DDTs (dedup tables). */ ddt_create(spa); spa_update_dspace(spa); tx = dmu_tx_create_assigned(dp, txg); /* * Create the pool config object. */ spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset, DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE, DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG, sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) { cmn_err(CE_PANIC, "failed to add pool config"); } if (spa_version(spa) >= SPA_VERSION_FEATURES) spa_feature_create_zap_objects(spa, tx); if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION, sizeof (uint64_t), 1, &version, tx) != 0) { cmn_err(CE_PANIC, "failed to add pool version"); } /* Newly created pools with the right version are always deflated. */ if (version >= SPA_VERSION_RAIDZ_DEFLATE) { spa->spa_deflate = TRUE; if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) { cmn_err(CE_PANIC, "failed to add deflate"); } } /* * Create the deferred-free bpobj. Turn off compression * because sync-to-convergence takes longer if the blocksize * keeps changing. */ obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx); dmu_object_set_compress(spa->spa_meta_objset, obj, ZIO_COMPRESS_OFF, tx); if (zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ, sizeof (uint64_t), 1, &obj, tx) != 0) { cmn_err(CE_PANIC, "failed to add bpobj"); } VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj)); /* * Create the pool's history object. */ if (version >= SPA_VERSION_ZPOOL_HISTORY) spa_history_create_obj(spa, tx); /* * Generate some random noise for salted checksums to operate on. */ (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes, sizeof (spa->spa_cksum_salt.zcs_bytes)); /* * Set pool properties. */ spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS); spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION); spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE); spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND); if (props != NULL) { spa_configfile_set(spa, props, B_FALSE); spa_sync_props(props, tx); } dmu_tx_commit(tx); spa->spa_sync_on = B_TRUE; txg_sync_start(spa->spa_dsl_pool); /* * We explicitly wait for the first transaction to complete so that our * bean counters are appropriately updated. */ txg_wait_synced(spa->spa_dsl_pool, txg); spa_spawn_aux_threads(spa); spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE); spa_history_log_version(spa, "create"); /* * Don't count references from objsets that are already closed * and are making their way through the eviction process. */ spa_evicting_os_wait(spa); spa->spa_minref = refcount_count(&spa->spa_refcount); spa->spa_load_state = SPA_LOAD_NONE; mutex_exit(&spa_namespace_lock); return (0); } #ifdef _KERNEL #ifdef illumos /* * Get the root pool information from the root disk, then import the root pool * during the system boot up time. */ extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); static nvlist_t * spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) { nvlist_t *config; nvlist_t *nvtop, *nvroot; uint64_t pgid; if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0) return (NULL); /* * Add this top-level vdev to the child array. */ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0); /* * Put this pool's top-level vdevs into a root vdev. */ VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &nvtop, 1) == 0); /* * Replace the existing vdev_tree with the new root vdev in * this pool's configuration (remove the old, add the new). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); return (config); } /* * Walk the vdev tree and see if we can find a device with "better" * configuration. A configuration is "better" if the label on that * device has a more recent txg. */ static void spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg) { for (int c = 0; c < vd->vdev_children; c++) spa_alt_rootvdev(vd->vdev_child[c], avd, txg); if (vd->vdev_ops->vdev_op_leaf) { nvlist_t *label; uint64_t label_txg; if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid, &label) != 0) return; VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG, &label_txg) == 0); /* * Do we have a better boot device? */ if (label_txg > *txg) { *txg = label_txg; *avd = vd; } nvlist_free(label); } } /* * Import a root pool. * * For x86. devpath_list will consist of devid and/or physpath name of * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a"). * The GRUB "findroot" command will return the vdev we should boot. * * For Sparc, devpath_list consists the physpath name of the booting device * no matter the rootpool is a single device pool or a mirrored pool. * e.g. * "/pci@1f,0/ide@d/disk@0,0:a" */ int spa_import_rootpool(char *devpath, char *devid) { spa_t *spa; vdev_t *rvd, *bvd, *avd = NULL; nvlist_t *config, *nvtop; uint64_t guid, txg; char *pname; int error; /* * Read the label from the boot device and generate a configuration. */ config = spa_generate_rootconf(devpath, devid, &guid); #if defined(_OBP) && defined(_KERNEL) if (config == NULL) { if (strstr(devpath, "/iscsi/ssd") != NULL) { /* iscsi boot */ get_iscsi_bootpath_phy(devpath); config = spa_generate_rootconf(devpath, devid, &guid); } } #endif if (config == NULL) { cmn_err(CE_NOTE, "Cannot read the pool label from '%s'", devpath); return (SET_ERROR(EIO)); } VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(pname)) != NULL) { /* * Remove the existing root pool from the namespace so that we * can replace it with the correct config we just read in. */ spa_remove(spa); } spa = spa_add(pname, config, NULL); spa->spa_is_root = B_TRUE; spa->spa_import_flags = ZFS_IMPORT_VERBATIM; if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &spa->spa_ubsync.ub_version) != 0) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; /* * Build up a vdev tree based on the boot device's label config. */ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); nvlist_free(config); cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", pname); return (error); } /* * Get the boot vdev. */ if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) { cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu", (u_longlong_t)guid); error = SET_ERROR(ENOENT); goto out; } /* * Determine if there is a better boot device. */ avd = bvd; spa_alt_rootvdev(rvd, &avd, &txg); if (avd != bvd) { cmn_err(CE_NOTE, "The boot device is 'degraded'. Please " "try booting from '%s'", avd->vdev_path); error = SET_ERROR(EINVAL); goto out; } /* * If the boot device is part of a spare vdev then ensure that * we're booting off the active spare. */ if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops && !bvd->vdev_isspare) { cmn_err(CE_NOTE, "The boot device is currently spared. Please " "try booting from '%s'", bvd->vdev_parent-> vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path); error = SET_ERROR(EINVAL); goto out; } error = 0; out: spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); spa_config_exit(spa, SCL_ALL, FTAG); mutex_exit(&spa_namespace_lock); nvlist_free(config); return (error); } #else /* !illumos */ extern int vdev_geom_read_pool_label(const char *name, nvlist_t ***configs, uint64_t *count); static nvlist_t * spa_generate_rootconf(const char *name) { nvlist_t **configs, **tops; nvlist_t *config; nvlist_t *best_cfg, *nvtop, *nvroot; uint64_t *holes; uint64_t best_txg; uint64_t nchildren; uint64_t pgid; uint64_t count; uint64_t i; uint_t nholes; if (vdev_geom_read_pool_label(name, &configs, &count) != 0) return (NULL); ASSERT3U(count, !=, 0); best_txg = 0; for (i = 0; i < count; i++) { uint64_t txg; VERIFY(nvlist_lookup_uint64(configs[i], ZPOOL_CONFIG_POOL_TXG, &txg) == 0); if (txg > best_txg) { best_txg = txg; best_cfg = configs[i]; } } nchildren = 1; nvlist_lookup_uint64(best_cfg, ZPOOL_CONFIG_VDEV_CHILDREN, &nchildren); holes = NULL; nvlist_lookup_uint64_array(best_cfg, ZPOOL_CONFIG_HOLE_ARRAY, &holes, &nholes); tops = kmem_zalloc(nchildren * sizeof(void *), KM_SLEEP); for (i = 0; i < nchildren; i++) { if (i >= count) break; if (configs[i] == NULL) continue; VERIFY(nvlist_lookup_nvlist(configs[i], ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); nvlist_dup(nvtop, &tops[i], KM_SLEEP); } for (i = 0; holes != NULL && i < nholes; i++) { if (i >= nchildren) continue; if (tops[holes[i]] != NULL) continue; nvlist_alloc(&tops[holes[i]], NV_UNIQUE_NAME, KM_SLEEP); VERIFY(nvlist_add_string(tops[holes[i]], ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0); VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_ID, holes[i]) == 0); VERIFY(nvlist_add_uint64(tops[holes[i]], ZPOOL_CONFIG_GUID, 0) == 0); } for (i = 0; i < nchildren; i++) { if (tops[i] != NULL) continue; nvlist_alloc(&tops[i], NV_UNIQUE_NAME, KM_SLEEP); VERIFY(nvlist_add_string(tops[i], ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) == 0); VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_ID, i) == 0); VERIFY(nvlist_add_uint64(tops[i], ZPOOL_CONFIG_GUID, 0) == 0); } /* * Create pool config based on the best vdev config. */ nvlist_dup(best_cfg, &config, KM_SLEEP); /* * Put this pool's top-level vdevs into a root vdev. */ VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pgid) == 0); VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0); VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, tops, nchildren) == 0); /* * Replace the existing vdev_tree with the new root vdev in * this pool's configuration (remove the old, add the new). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); /* * Drop vdev config elements that should not be present at pool level. */ nvlist_remove(config, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64); nvlist_remove(config, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64); for (i = 0; i < count; i++) nvlist_free(configs[i]); kmem_free(configs, count * sizeof(void *)); for (i = 0; i < nchildren; i++) nvlist_free(tops[i]); kmem_free(tops, nchildren * sizeof(void *)); nvlist_free(nvroot); return (config); } int spa_import_rootpool(const char *name) { spa_t *spa; vdev_t *rvd, *bvd, *avd = NULL; nvlist_t *config, *nvtop; uint64_t txg; char *pname; int error; /* * Read the label from the boot device and generate a configuration. */ config = spa_generate_rootconf(name); mutex_enter(&spa_namespace_lock); if (config != NULL) { VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) == 0 && strcmp(name, pname) == 0); VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0); if ((spa = spa_lookup(pname)) != NULL) { /* * The pool could already be imported, * e.g., after reboot -r. */ if (spa->spa_state == POOL_STATE_ACTIVE) { mutex_exit(&spa_namespace_lock); nvlist_free(config); return (0); } /* * Remove the existing root pool from the namespace so * that we can replace it with the correct config * we just read in. */ spa_remove(spa); } spa = spa_add(pname, config, NULL); /* * Set spa_ubsync.ub_version as it can be used in vdev_alloc() * via spa_version(). */ if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &spa->spa_ubsync.ub_version) != 0) spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL; } else if ((spa = spa_lookup(name)) == NULL) { mutex_exit(&spa_namespace_lock); nvlist_free(config); cmn_err(CE_NOTE, "Cannot find the pool label for '%s'", name); return (EIO); } else { VERIFY(nvlist_dup(spa->spa_config, &config, KM_SLEEP) == 0); } spa->spa_is_root = B_TRUE; spa->spa_import_flags = ZFS_IMPORT_VERBATIM; /* * Build up a vdev tree based on the boot device's label config. */ VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); error = spa_config_parse(spa, &rvd, nvtop, NULL, 0, VDEV_ALLOC_ROOTPOOL); spa_config_exit(spa, SCL_ALL, FTAG); if (error) { mutex_exit(&spa_namespace_lock); nvlist_free(config); cmn_err(CE_NOTE, "Can not parse the config for pool '%s'", pname); return (error); } spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); vdev_free(rvd); spa_config_exit(spa, SCL_ALL, FTAG); mutex_exit(&spa_namespace_lock); nvlist_free(config); return (0); } #endif /* illumos */ #endif /* _KERNEL */ /* * Import a non-root pool into the system. */ int spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) { spa_t *spa; char *altroot = NULL; spa_load_state_t state = SPA_LOAD_IMPORT; zpool_load_policy_t policy; uint64_t mode = spa_mode_global; uint64_t readonly = B_FALSE; int error; nvlist_t *nvroot; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; /* * If a pool with this name exists, return failure. */ mutex_enter(&spa_namespace_lock); if (spa_lookup(pool) != NULL) { mutex_exit(&spa_namespace_lock); return (SET_ERROR(EEXIST)); } /* * Create and initialize the spa structure. */ (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); (void) nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly); if (readonly) mode = FREAD; spa = spa_add(pool, config, altroot); spa->spa_import_flags = flags; /* * Verbatim import - Take a pool and insert it into the namespace * as if it had been loaded at boot. */ if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) { if (props != NULL) spa_configfile_set(spa, props, B_FALSE); spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); zfs_dbgmsg("spa_import: verbatim import of %s", pool); mutex_exit(&spa_namespace_lock); return (0); } spa_activate(spa, mode); /* * Don't start async tasks until we know everything is healthy. */ spa_async_suspend(spa); zpool_get_load_policy(config, &policy); if (policy.zlp_rewind & ZPOOL_DO_REWIND) state = SPA_LOAD_RECOVER; spa->spa_config_source = SPA_CONFIG_SRC_TRYIMPORT; if (state != SPA_LOAD_RECOVER) { spa->spa_last_ubsync_txg = spa->spa_load_txg = 0; zfs_dbgmsg("spa_import: importing %s", pool); } else { zfs_dbgmsg("spa_import: importing %s, max_txg=%lld " "(RECOVERY MODE)", pool, (longlong_t)policy.zlp_txg); } error = spa_load_best(spa, state, policy.zlp_txg, policy.zlp_rewind); /* * Propagate anything learned while loading the pool and pass it * back to caller (i.e. rewind info, missing devices, etc). */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * Toss any existing sparelist, as it doesn't have any validity * anymore, and conflicts with spa_has_spare(). */ if (spa->spa_spares.sav_config) { nvlist_free(spa->spa_spares.sav_config); spa->spa_spares.sav_config = NULL; spa_load_spares(spa); } if (spa->spa_l2cache.sav_config) { nvlist_free(spa->spa_l2cache.sav_config); spa->spa_l2cache.sav_config = NULL; spa_load_l2cache(spa); } VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); if (error == 0) error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_SPARE); if (error == 0) error = spa_validate_aux(spa, nvroot, -1ULL, VDEV_ALLOC_L2CACHE); spa_config_exit(spa, SCL_ALL, FTAG); if (props != NULL) spa_configfile_set(spa, props, B_FALSE); if (error != 0 || (props && spa_writeable(spa) && (error = spa_prop_set(spa, props)))) { spa_unload(spa); spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (error); } spa_async_resume(spa); /* * Override any spares and level 2 cache devices as specified by * the user, as these may have correct device names/devids, etc. */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { if (spa->spa_spares.sav_config) VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); else VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_spares(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_spares.sav_sync = B_TRUE; } if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { if (spa->spa_l2cache.sav_config) VERIFY(nvlist_remove(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); else VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa_load_l2cache(spa); spa_config_exit(spa, SCL_ALL, FTAG); spa->spa_l2cache.sav_sync = B_TRUE; } /* * Check for any removed devices. */ if (spa->spa_autoreplace) { spa_aux_check_removed(&spa->spa_spares); spa_aux_check_removed(&spa->spa_l2cache); } if (spa_writeable(spa)) { /* * Update the config cache to include the newly-imported pool. */ spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); } /* * It's possible that the pool was expanded while it was exported. * We kick off an async task to handle this for us. */ spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); spa_history_log_version(spa, "import"); spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT); mutex_exit(&spa_namespace_lock); #ifdef __FreeBSD__ #ifdef _KERNEL zvol_create_minors(pool); #endif #endif return (0); } nvlist_t * spa_tryimport(nvlist_t *tryconfig) { nvlist_t *config = NULL; char *poolname, *cachefile; spa_t *spa; uint64_t state; int error; zpool_load_policy_t policy; if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname)) return (NULL); if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state)) return (NULL); /* * Create and initialize the spa structure. */ mutex_enter(&spa_namespace_lock); spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL); spa_activate(spa, FREAD); /* * Rewind pool if a max txg was provided. */ zpool_get_load_policy(spa->spa_config, &policy); if (policy.zlp_txg != UINT64_MAX) { spa->spa_load_max_txg = policy.zlp_txg; spa->spa_extreme_rewind = B_TRUE; zfs_dbgmsg("spa_tryimport: importing %s, max_txg=%lld", poolname, (longlong_t)policy.zlp_txg); } else { zfs_dbgmsg("spa_tryimport: importing %s", poolname); } if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_CACHEFILE, &cachefile) == 0) { zfs_dbgmsg("spa_tryimport: using cachefile '%s'", cachefile); spa->spa_config_source = SPA_CONFIG_SRC_CACHEFILE; } else { spa->spa_config_source = SPA_CONFIG_SRC_SCAN; } error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING); /* * If 'tryconfig' was at least parsable, return the current config. */ if (spa->spa_root_vdev != NULL) { config = spa_config_generate(spa, NULL, -1ULL, B_TRUE); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, poolname) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, state) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP, spa->spa_uberblock.ub_timestamp) == 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); /* * If the bootfs property exists on this pool then we * copy it out so that external consumers can tell which * pools are bootable. */ if ((!error || error == EEXIST) && spa->spa_bootfs) { char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* * We have to play games with the name since the * pool was opened as TRYIMPORT_NAME. */ if (dsl_dsobj_to_dsname(spa_name(spa), spa->spa_bootfs, tmpname) == 0) { char *cp; char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); cp = strchr(tmpname, '/'); if (cp == NULL) { (void) strlcpy(dsname, tmpname, MAXPATHLEN); } else { (void) snprintf(dsname, MAXPATHLEN, "%s/%s", poolname, ++cp); } VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_BOOTFS, dsname) == 0); kmem_free(dsname, MAXPATHLEN); } kmem_free(tmpname, MAXPATHLEN); } /* * Add the list of hot spares and level 2 cache devices. */ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa_add_spares(spa, config); spa_add_l2cache(spa, config); spa_config_exit(spa, SCL_CONFIG, FTAG); } spa_unload(spa); spa_deactivate(spa); spa_remove(spa); mutex_exit(&spa_namespace_lock); return (config); } /* * Pool export/destroy * * The act of destroying or exporting a pool is very simple. We make sure there * is no more pending I/O and any references to the pool are gone. Then, we * update the pool state and sync all the labels to disk, removing the * configuration from the cache afterwards. If the 'hardforce' flag is set, then * we don't sync the labels or remove the configuration cache. */ static int spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, boolean_t force, boolean_t hardforce) { spa_t *spa; if (oldconfig) *oldconfig = NULL; if (!(spa_mode_global & FWRITE)) return (SET_ERROR(EROFS)); mutex_enter(&spa_namespace_lock); if ((spa = spa_lookup(pool)) == NULL) { mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENOENT)); } /* * Put a hold on the pool, drop the namespace lock, stop async tasks, * reacquire the namespace lock, and see if we can export. */ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); spa_async_suspend(spa); mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); /* * The pool will be in core if it's openable, * in which case we can modify its state. */ if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) { /* * Objsets may be open only because they're dirty, so we * have to force it to sync before checking spa_refcnt. */ txg_wait_synced(spa->spa_dsl_pool, 0); spa_evicting_os_wait(spa); /* * A pool cannot be exported or destroyed if there are active * references. If we are resetting a pool, allow references by * fault injection handlers. */ if (!spa_refcount_zero(spa) || (spa->spa_inject_ref != 0 && new_state != POOL_STATE_UNINITIALIZED)) { spa_async_resume(spa); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EBUSY)); } /* * A pool cannot be exported if it has an active shared spare. * This is to prevent other pools stealing the active spare * from an exported pool. At user's own will, such pool can * be forcedly exported. */ if (!force && new_state == POOL_STATE_EXPORTED && spa_has_active_shared_spare(spa)) { spa_async_resume(spa); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EXDEV)); } /* * We're about to export or destroy this pool. Make sure * we stop all initializtion activity here before we * set the spa_final_txg. This will ensure that all * dirty data resulting from the initialization is * committed to disk before we unload the pool. */ if (spa->spa_root_vdev != NULL) { vdev_initialize_stop_all(spa->spa_root_vdev, VDEV_INITIALIZE_ACTIVE); } /* * We want this to be reflected on every label, * so mark them all dirty. spa_unload() will do the * final sync that pushes these changes out. */ if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) { spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); spa->spa_state = new_state; spa->spa_final_txg = spa_last_synced_txg(spa) + TXG_DEFER_SIZE + 1; vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); } } spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { spa_unload(spa); spa_deactivate(spa); } if (oldconfig && spa->spa_config) VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0); if (new_state != POOL_STATE_UNINITIALIZED) { if (!hardforce) spa_write_cachefile(spa, B_TRUE, B_TRUE); spa_remove(spa); } mutex_exit(&spa_namespace_lock); return (0); } /* * Destroy a storage pool. */ int spa_destroy(char *pool) { return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL, B_FALSE, B_FALSE)); } /* * Export a storage pool. */ int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force, boolean_t hardforce) { return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig, force, hardforce)); } /* * Similar to spa_export(), this unloads the spa_t without actually removing it * from the namespace in any way. */ int spa_reset(char *pool) { return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL, B_FALSE, B_FALSE)); } /* * ========================================================================== * Device manipulation * ========================================================================== */ /* * Add a device to a storage pool. */ int spa_vdev_add(spa_t *spa, nvlist_t *nvroot) { uint64_t txg, id; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *tvd; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0, VDEV_ALLOC_ADD)) != 0) return (spa_vdev_exit(spa, NULL, txg, error)); spa->spa_pending_vdev = vd; /* spa_vdev_exit() will clear this */ if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) != 0) nspares = 0; if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) != 0) nl2cache = 0; if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0) return (spa_vdev_exit(spa, vd, txg, EINVAL)); if (vd->vdev_children != 0 && (error = vdev_create(vd, txg, B_FALSE)) != 0) return (spa_vdev_exit(spa, vd, txg, error)); /* * We must validate the spares and l2cache devices after checking the * children. Otherwise, vdev_inuse() will blindly overwrite the spare. */ if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0) return (spa_vdev_exit(spa, vd, txg, error)); /* * If we are in the middle of a device removal, we can only add * devices which match the existing devices in the pool. * If we are in the middle of a removal, or have some indirect * vdevs, we can not add raidz toplevels. */ if (spa->spa_vdev_removal != NULL || spa->spa_removing_phys.sr_prev_indirect_vdev != -1) { for (int c = 0; c < vd->vdev_children; c++) { tvd = vd->vdev_child[c]; if (spa->spa_vdev_removal != NULL && tvd->vdev_ashift != spa->spa_max_ashift) { return (spa_vdev_exit(spa, vd, txg, EINVAL)); } /* Fail if top level vdev is raidz */ if (tvd->vdev_ops == &vdev_raidz_ops) { return (spa_vdev_exit(spa, vd, txg, EINVAL)); } /* * Need the top level mirror to be * a mirror of leaf vdevs only */ if (tvd->vdev_ops == &vdev_mirror_ops) { for (uint64_t cid = 0; cid < tvd->vdev_children; cid++) { vdev_t *cvd = tvd->vdev_child[cid]; if (!cvd->vdev_ops->vdev_op_leaf) { return (spa_vdev_exit(spa, vd, txg, EINVAL)); } } } } } for (int c = 0; c < vd->vdev_children; c++) { /* * Set the vdev id to the first hole, if one exists. */ for (id = 0; id < rvd->vdev_children; id++) { if (rvd->vdev_child[id]->vdev_ishole) { vdev_free(rvd->vdev_child[id]); break; } } tvd = vd->vdev_child[c]; vdev_remove_child(vd, tvd); tvd->vdev_id = id; vdev_add_child(rvd, tvd); vdev_config_dirty(tvd); } if (nspares != 0) { spa_set_aux_vdevs(&spa->spa_spares, spares, nspares, ZPOOL_CONFIG_SPARES); spa_load_spares(spa); spa->spa_spares.sav_sync = B_TRUE; } if (nl2cache != 0) { spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache, ZPOOL_CONFIG_L2CACHE); spa_load_l2cache(spa); spa->spa_l2cache.sav_sync = B_TRUE; } /* * We have to be careful when adding new vdevs to an existing pool. * If other threads start allocating from these vdevs before we * sync the config cache, and we lose power, then upon reboot we may * fail to open the pool because there are DVAs that the config cache * can't translate. Therefore, we first add the vdevs without * initializing metaslabs; sync the config cache (via spa_vdev_exit()); * and then let spa_config_update() initialize the new metaslabs. * * spa_load() checks for added-but-not-initialized vdevs, so that * if we lose power at any point in this sequence, the remaining * steps will be completed the next time we load the pool. */ (void) spa_vdev_exit(spa, vd, txg, 0); mutex_enter(&spa_namespace_lock); spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD); mutex_exit(&spa_namespace_lock); return (0); } /* * Attach a device to a mirror. The arguments are the path to any device * in the mirror, and the nvroot for the new device. If the path specifies * a device that is not mirrored, we automatically insert the mirror vdev. * * If 'replacing' is specified, the new device is intended to replace the * existing device; in this case the two devices are made into their own * mirror using the 'replacing' vdev, which is functionally identical to * the mirror vdev (it actually reuses all the same ops) but has a few * extra rules: you can't attach to it after it's been created, and upon * completion of resilvering, the first disk (the one being replaced) * is automatically detached. */ int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) { uint64_t txg, dtl_max_txg; vdev_t *rvd = spa->spa_root_vdev; vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd; vdev_ops_t *pvops; char *oldvdpath, *newvdpath; int newvd_isspare; int error; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); oldvd = spa_lookup_by_guid(spa, guid, B_FALSE); ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; return (spa_vdev_exit(spa, NULL, txg, error)); } if (spa->spa_vdev_removal != NULL) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); if (oldvd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); if (!oldvd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); pvd = oldvd->vdev_parent; if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0, VDEV_ALLOC_ATTACH)) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); if (newrootvd->vdev_children != 1) return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); newvd = newrootvd->vdev_child[0]; if (!newvd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, newrootvd, txg, EINVAL)); if ((error = vdev_create(newrootvd, txg, replacing)) != 0) return (spa_vdev_exit(spa, newrootvd, txg, error)); /* * Spares can't replace logs */ if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); if (!replacing) { /* * For attach, the only allowable parent is a mirror or the root * vdev. */ if (pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_root_ops) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); pvops = &vdev_mirror_ops; } else { /* * Active hot spares can only be replaced by inactive hot * spares. */ if (pvd->vdev_ops == &vdev_spare_ops && oldvd->vdev_isspare && !spa_has_spare(spa, newvd->vdev_guid)) return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); /* * If the source is a hot spare, and the parent isn't already a * spare, then we want to create a new hot spare. Otherwise, we * want to create a replacing vdev. The user is not allowed to * attach to a spared vdev child unless the 'isspare' state is * the same (spare replaces spare, non-spare replaces * non-spare). */ if (pvd->vdev_ops == &vdev_replacing_ops && spa_version(spa) < SPA_VERSION_MULTI_REPLACE) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } else if (pvd->vdev_ops == &vdev_spare_ops && newvd->vdev_isspare != oldvd->vdev_isspare) { return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP)); } if (newvd->vdev_isspare) pvops = &vdev_spare_ops; else pvops = &vdev_replacing_ops; } /* * Make sure the new device is big enough. */ if (newvd->vdev_asize < vdev_get_min_asize(oldvd)) return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); /* * The new device cannot have a higher alignment requirement * than the top-level vdev. */ if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); /* * If this is an in-place replacement, update oldvd's path and devid * to make it distinguishable from newvd, and unopenable from now on. */ if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { spa_strfree(oldvd->vdev_path); oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, KM_SLEEP); (void) sprintf(oldvd->vdev_path, "%s/%s", newvd->vdev_path, "old"); if (oldvd->vdev_devid != NULL) { spa_strfree(oldvd->vdev_devid); oldvd->vdev_devid = NULL; } } /* mark the device being resilvered */ newvd->vdev_resilver_txg = txg; /* * If the parent is not a mirror, or if we're replacing, insert the new * mirror/replacing/spare vdev above oldvd. */ if (pvd->vdev_ops != pvops) pvd = vdev_add_parent(oldvd, pvops); ASSERT(pvd->vdev_top->vdev_parent == rvd); ASSERT(pvd->vdev_ops == pvops); ASSERT(oldvd->vdev_parent == pvd); /* * Extract the new device from its root and add it to pvd. */ vdev_remove_child(newrootvd, newvd); newvd->vdev_id = pvd->vdev_children; newvd->vdev_crtxg = oldvd->vdev_crtxg; vdev_add_child(pvd, newvd); tvd = newvd->vdev_top; ASSERT(pvd->vdev_top == tvd); ASSERT(tvd->vdev_parent == rvd); vdev_config_dirty(tvd); /* * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account * for any dmu_sync-ed blocks. It will propagate upward when * spa_vdev_exit() calls vdev_dtl_reassess(). */ dtl_max_txg = txg + TXG_CONCURRENT_STATES; vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL, dtl_max_txg - TXG_INITIAL); if (newvd->vdev_isspare) { spa_spare_activate(newvd); spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE); } oldvdpath = spa_strdup(oldvd->vdev_path); newvdpath = spa_strdup(newvd->vdev_path); newvd_isspare = newvd->vdev_isspare; /* * Mark newvd's DTL dirty in this txg. */ vdev_dirty(tvd, VDD_DTL, newvd, txg); /* * Schedule the resilver to restart in the future. We do this to * ensure that dmu_sync-ed blocks have been stitched into the * respective datasets. */ dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg); if (spa->spa_bootfs) spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH); spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_ATTACH); /* * Commit the config */ (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0); spa_history_log_internal(spa, "vdev attach", NULL, "%s vdev=%s %s vdev=%s", replacing && newvd_isspare ? "spare in" : replacing ? "replace" : "attach", newvdpath, replacing ? "for" : "to", oldvdpath); spa_strfree(oldvdpath); spa_strfree(newvdpath); return (0); } /* * Detach a device from a mirror or replacing vdev. * * If 'replace_done' is specified, only detach if the parent * is a replacing vdev. */ int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) { uint64_t txg; int error; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd, *pvd, *cvd, *tvd; boolean_t unspare = B_FALSE; uint64_t unspare_guid = 0; char *vdpath; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); vd = spa_lookup_by_guid(spa, guid, B_FALSE); /* * Besides being called directly from the userland through the * ioctl interface, spa_vdev_detach() can be potentially called * at the end of spa_vdev_resilver_done(). * * In the regular case, when we have a checkpoint this shouldn't * happen as we never empty the DTLs of a vdev during the scrub * [see comment in dsl_scan_done()]. Thus spa_vdev_resilvering_done() * should never get here when we have a checkpoint. * * That said, even in a case when we checkpoint the pool exactly * as spa_vdev_resilver_done() calls this function everything * should be fine as the resilver will return right away. */ ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; return (spa_vdev_exit(spa, NULL, txg, error)); } if (vd == NULL) return (spa_vdev_exit(spa, NULL, txg, ENODEV)); if (!vd->vdev_ops->vdev_op_leaf) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); pvd = vd->vdev_parent; /* * If the parent/child relationship is not as expected, don't do it. * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing * vdev that's replacing B with C. The user's intent in replacing * is to go from M(A,B) to M(A,C). If the user decides to cancel * the replace by detaching C, the expected behavior is to end up * M(A,B). But suppose that right after deciding to detach C, * the replacement of B completes. We would have M(A,C), and then * ask to detach C, which would leave us with just A -- not what * the user wanted. To prevent this, we make sure that the * parent/child relationship hasn't changed -- in this example, * that C's parent is still the replacing vdev R. */ if (pvd->vdev_guid != pguid && pguid != 0) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); /* * Only 'replacing' or 'spare' vdevs can be replaced. */ if (replace_done && pvd->vdev_ops != &vdev_replacing_ops && pvd->vdev_ops != &vdev_spare_ops) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); ASSERT(pvd->vdev_ops != &vdev_spare_ops || spa_version(spa) >= SPA_VERSION_SPARES); /* * Only mirror, replacing, and spare vdevs support detach. */ if (pvd->vdev_ops != &vdev_replacing_ops && pvd->vdev_ops != &vdev_mirror_ops && pvd->vdev_ops != &vdev_spare_ops) return (spa_vdev_exit(spa, NULL, txg, ENOTSUP)); /* * If this device has the only valid copy of some data, * we cannot safely detach it. */ if (vdev_dtl_required(vd)) return (spa_vdev_exit(spa, NULL, txg, EBUSY)); ASSERT(pvd->vdev_children >= 2); /* * If we are detaching the second disk from a replacing vdev, then * check to see if we changed the original vdev's path to have "/old" * at the end in spa_vdev_attach(). If so, undo that change now. */ if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 && vd->vdev_path != NULL) { size_t len = strlen(vd->vdev_path); for (int c = 0; c < pvd->vdev_children; c++) { cvd = pvd->vdev_child[c]; if (cvd == vd || cvd->vdev_path == NULL) continue; if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 && strcmp(cvd->vdev_path + len, "/old") == 0) { spa_strfree(cvd->vdev_path); cvd->vdev_path = spa_strdup(vd->vdev_path); break; } } } /* * If we are detaching the original disk from a spare, then it implies * that the spare should become a real disk, and be removed from the * active spare list for the pool. */ if (pvd->vdev_ops == &vdev_spare_ops && vd->vdev_id == 0 && pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare) unspare = B_TRUE; /* * Erase the disk labels so the disk can be used for other things. * This must be done after all other error cases are handled, * but before we disembowel vd (so we can still do I/O to it). * But if we can't do it, don't treat the error as fatal -- * it may be that the unwritability of the disk is the reason * it's being detached! */ error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE); /* * Remove vd from its parent and compact the parent's children. */ vdev_remove_child(pvd, vd); vdev_compact_children(pvd); /* * Remember one of the remaining children so we can get tvd below. */ cvd = pvd->vdev_child[pvd->vdev_children - 1]; /* * If we need to remove the remaining child from the list of hot spares, * do it now, marking the vdev as no longer a spare in the process. * We must do this before vdev_remove_parent(), because that can * change the GUID if it creates a new toplevel GUID. For a similar * reason, we must remove the spare now, in the same txg as the detach; * otherwise someone could attach a new sibling, change the GUID, and * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail. */ if (unspare) { ASSERT(cvd->vdev_isspare); spa_spare_remove(cvd); unspare_guid = cvd->vdev_guid; (void) spa_vdev_remove(spa, unspare_guid, B_TRUE); cvd->vdev_unspare = B_TRUE; } /* * If the parent mirror/replacing vdev only has one child, * the parent is no longer needed. Remove it from the tree. */ if (pvd->vdev_children == 1) { if (pvd->vdev_ops == &vdev_spare_ops) cvd->vdev_unspare = B_FALSE; vdev_remove_parent(cvd); } /* * We don't set tvd until now because the parent we just removed * may have been the previous top-level vdev. */ tvd = cvd->vdev_top; ASSERT(tvd->vdev_parent == rvd); /* * Reevaluate the parent vdev state. */ vdev_propagate_state(cvd); /* * If the 'autoexpand' property is set on the pool then automatically * try to expand the size of the pool. For example if the device we * just detached was smaller than the others, it may be possible to * add metaslabs (i.e. grow the pool). We need to reopen the vdev * first so that we can obtain the updated sizes of the leaf vdevs. */ if (spa->spa_autoexpand) { vdev_reopen(tvd); vdev_expand(tvd, txg); } vdev_config_dirty(tvd); /* * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that * vd->vdev_detached is set and free vd's DTL object in syncing context. * But first make sure we're not on any *other* txg's DTL list, to * prevent vd from being accessed after it's freed. */ vdpath = spa_strdup(vd->vdev_path); for (int t = 0; t < TXG_SIZE; t++) (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t); vd->vdev_detached = B_TRUE; vdev_dirty(tvd, VDD_DTL, vd, txg); spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE); /* hang on to the spa before we release the lock */ spa_open_ref(spa, FTAG); error = spa_vdev_exit(spa, vd, txg, 0); spa_history_log_internal(spa, "detach", NULL, "vdev=%s", vdpath); spa_strfree(vdpath); /* * If this was the removal of the original device in a hot spare vdev, * then we want to go through and remove the device from the hot spare * list of every other pool. */ if (unspare) { spa_t *altspa = NULL; mutex_enter(&spa_namespace_lock); while ((altspa = spa_next(altspa)) != NULL) { if (altspa->spa_state != POOL_STATE_ACTIVE || altspa == spa) continue; spa_open_ref(altspa, FTAG); mutex_exit(&spa_namespace_lock); (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE); mutex_enter(&spa_namespace_lock); spa_close(altspa, FTAG); } mutex_exit(&spa_namespace_lock); /* search the rest of the vdevs for spares to remove */ spa_vdev_resilver_done(spa); } /* all done with the spa; OK to release */ mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); mutex_exit(&spa_namespace_lock); return (error); } int spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type) { /* * We hold the namespace lock through the whole function * to prevent any changes to the pool while we're starting or * stopping initialization. The config and state locks are held so that * we can properly assess the vdev state before we commit to * the initializing operation. */ mutex_enter(&spa_namespace_lock); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); /* Look up vdev and ensure it's a leaf. */ vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE); if (vd == NULL || vd->vdev_detached) { spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); mutex_exit(&spa_namespace_lock); return (SET_ERROR(ENODEV)); } else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) { spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EINVAL)); } else if (!vdev_writeable(vd)) { spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EROFS)); } mutex_enter(&vd->vdev_initialize_lock); spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); /* * When we activate an initialize action we check to see * if the vdev_initialize_thread is NULL. We do this instead * of using the vdev_initialize_state since there might be * a previous initialization process which has completed but * the thread is not exited. */ if (cmd_type == POOL_INITIALIZE_DO && (vd->vdev_initialize_thread != NULL || vd->vdev_top->vdev_removing)) { mutex_exit(&vd->vdev_initialize_lock); mutex_exit(&spa_namespace_lock); return (SET_ERROR(EBUSY)); } else if (cmd_type == POOL_INITIALIZE_CANCEL && (vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE && vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED)) { mutex_exit(&vd->vdev_initialize_lock); mutex_exit(&spa_namespace_lock); return (SET_ERROR(ESRCH)); } else if (cmd_type == POOL_INITIALIZE_SUSPEND && vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) { mutex_exit(&vd->vdev_initialize_lock); mutex_exit(&spa_namespace_lock); return (SET_ERROR(ESRCH)); } switch (cmd_type) { case POOL_INITIALIZE_DO: vdev_initialize(vd); break; case POOL_INITIALIZE_CANCEL: vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED); break; case POOL_INITIALIZE_SUSPEND: vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED); break; default: panic("invalid cmd_type %llu", (unsigned long long)cmd_type); } mutex_exit(&vd->vdev_initialize_lock); /* Sync out the initializing state */ txg_wait_synced(spa->spa_dsl_pool, 0); mutex_exit(&spa_namespace_lock); return (0); } /* * Split a set of devices from their mirrors, and create a new pool from them. */ int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, nvlist_t *props, boolean_t exp) { int error = 0; uint64_t txg, *glist; spa_t *newspa; uint_t c, children, lastlog; nvlist_t **child, *nvl, *tmp; dmu_tx_t *tx; char *altroot = NULL; vdev_t *rvd, **vml = NULL; /* vdev modify list */ boolean_t activate_slog; ASSERT(spa_writeable(spa)); txg = spa_vdev_enter(spa); ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT)) { error = (spa_has_checkpoint(spa)) ? ZFS_ERR_CHECKPOINT_EXISTS : ZFS_ERR_DISCARDING_CHECKPOINT; return (spa_vdev_exit(spa, NULL, txg, error)); } /* clear the log and flush everything up to now */ activate_slog = spa_passivate_log(spa); (void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); error = spa_reset_logs(spa); txg = spa_vdev_config_enter(spa); if (activate_slog) spa_activate_log(spa); if (error != 0) return (spa_vdev_exit(spa, NULL, txg, error)); /* check new spa name before going any further */ if (spa_lookup(newname) != NULL) return (spa_vdev_exit(spa, NULL, txg, EEXIST)); /* * scan through all the children to ensure they're all mirrors */ if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 || nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); /* first, check to ensure we've got the right child count */ rvd = spa->spa_root_vdev; lastlog = 0; for (c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; /* don't count the holes & logs as children */ if (vd->vdev_islog || !vdev_is_concrete(vd)) { if (lastlog == 0) lastlog = c; continue; } lastlog = 0; } if (children != (lastlog != 0 ? lastlog : rvd->vdev_children)) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); /* next, ensure no spare or cache devices are part of the split */ if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 || nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); /* then, loop over each vdev and validate it */ for (c = 0; c < children; c++) { uint64_t is_hole = 0; (void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &is_hole); if (is_hole != 0) { if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole || spa->spa_root_vdev->vdev_child[c]->vdev_islog) { continue; } else { error = SET_ERROR(EINVAL); break; } } /* which disk is going to be split? */ if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID, &glist[c]) != 0) { error = SET_ERROR(EINVAL); break; } /* look it up in the spa */ vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE); if (vml[c] == NULL) { error = SET_ERROR(ENODEV); break; } /* make sure there's nothing stopping the split */ if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops || vml[c]->vdev_islog || !vdev_is_concrete(vml[c]) || vml[c]->vdev_isspare || vml[c]->vdev_isl2cache || !vdev_writeable(vml[c]) || vml[c]->vdev_children != 0 || vml[c]->vdev_state != VDEV_STATE_HEALTHY || c != spa->spa_root_vdev->vdev_child[c]->vdev_id) { error = SET_ERROR(EINVAL); break; } if (vdev_dtl_required(vml[c])) { error = SET_ERROR(EBUSY); break; } /* we need certain info from the top level */ VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY, vml[c]->vdev_top->vdev_ms_array) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT, vml[c]->vdev_top->vdev_ms_shift) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE, vml[c]->vdev_top->vdev_asize) == 0); VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT, vml[c]->vdev_top->vdev_ashift) == 0); /* transfer per-vdev ZAPs */ ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0); VERIFY0(nvlist_add_uint64(child[c], ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap)); ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0); VERIFY0(nvlist_add_uint64(child[c], ZPOOL_CONFIG_VDEV_TOP_ZAP, vml[c]->vdev_parent->vdev_top_zap)); } if (error != 0) { kmem_free(vml, children * sizeof (vdev_t *)); kmem_free(glist, children * sizeof (uint64_t)); return (spa_vdev_exit(spa, NULL, txg, error)); } /* stop writers from using the disks */ for (c = 0; c < children; c++) { if (vml[c] != NULL) vml[c]->vdev_offline = B_TRUE; } vdev_reopen(spa->spa_root_vdev); /* * Temporarily record the splitting vdevs in the spa config. This * will disappear once the config is regenerated. */ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, glist, children) == 0); kmem_free(glist, children * sizeof (uint64_t)); mutex_enter(&spa->spa_props_lock); VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT, nvl) == 0); mutex_exit(&spa->spa_props_lock); spa->spa_config_splitting = nvl; vdev_config_dirty(spa->spa_root_vdev); /* configure and create the new pool */ VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, spa->spa_config_txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_generate_guid(NULL)) == 0); VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)); (void) nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot); /* add the new pool to the namespace */ newspa = spa_add(newname, config, altroot); newspa->spa_avz_action = AVZ_ACTION_REBUILD; newspa->spa_config_txg = spa->spa_config_txg; spa_set_log_state(newspa, SPA_LOG_CLEAR); /* release the spa config lock, retaining the namespace lock */ spa_vdev_config_exit(spa, NULL, txg, 0, FTAG); if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 1); spa_activate(newspa, spa_mode_global); spa_async_suspend(newspa); for (c = 0; c < children; c++) { if (vml[c] != NULL) { /* * Temporarily stop the initializing activity. We set * the state to ACTIVE so that we know to resume * the initializing once the split has completed. */ mutex_enter(&vml[c]->vdev_initialize_lock); vdev_initialize_stop(vml[c], VDEV_INITIALIZE_ACTIVE); mutex_exit(&vml[c]->vdev_initialize_lock); } } #ifndef illumos /* mark that we are creating new spa by splitting */ newspa->spa_splitting_newspa = B_TRUE; #endif newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT; /* create the new pool from the disks of the original pool */ error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE); #ifndef illumos newspa->spa_splitting_newspa = B_FALSE; #endif if (error) goto out; /* if that worked, generate a real config for the new pool */ if (newspa->spa_root_vdev != NULL) { VERIFY(nvlist_alloc(&newspa->spa_config_splitting, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, B_TRUE)); } /* set the props */ if (props != NULL) { spa_configfile_set(newspa, props, B_FALSE); error = spa_prop_set(newspa, props); if (error) goto out; } /* flush everything */ txg = spa_vdev_config_enter(newspa); vdev_config_dirty(newspa->spa_root_vdev); (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG); if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 2); spa_async_resume(newspa); /* finally, update the original pool's config */ txg = spa_vdev_config_enter(spa); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); error = dmu_tx_assign(tx, TXG_WAIT); if (error != 0) dmu_tx_abort(tx); for (c = 0; c < children; c++) { if (vml[c] != NULL) { vdev_split(vml[c]); if (error == 0) spa_history_log_internal(spa, "detach", tx, "vdev=%s", vml[c]->vdev_path); vdev_free(vml[c]); } } spa->spa_avz_action = AVZ_ACTION_REBUILD; vdev_config_dirty(spa->spa_root_vdev); spa->spa_config_splitting = NULL; nvlist_free(nvl); if (error == 0) dmu_tx_commit(tx); (void) spa_vdev_exit(spa, NULL, txg, 0); if (zio_injection_enabled) zio_handle_panic_injection(spa, FTAG, 3); /* split is complete; log a history record */ spa_history_log_internal(newspa, "split", NULL, "from pool %s", spa_name(spa)); kmem_free(vml, children * sizeof (vdev_t *)); /* if we're not going to mount the filesystems in userland, export */ if (exp) error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL, B_FALSE, B_FALSE); return (error); out: spa_unload(newspa); spa_deactivate(newspa); spa_remove(newspa); txg = spa_vdev_config_enter(spa); /* re-online all offlined disks */ for (c = 0; c < children; c++) { if (vml[c] != NULL) vml[c]->vdev_offline = B_FALSE; } /* restart initializing disks as necessary */ spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART); vdev_reopen(spa->spa_root_vdev); nvlist_free(spa->spa_config_splitting); spa->spa_config_splitting = NULL; (void) spa_vdev_exit(spa, NULL, txg, error); kmem_free(vml, children * sizeof (vdev_t *)); return (error); } /* * Find any device that's done replacing, or a vdev marked 'unspare' that's * currently spared, so we can detach it. */ static vdev_t * spa_vdev_resilver_done_hunt(vdev_t *vd) { vdev_t *newvd, *oldvd; for (int c = 0; c < vd->vdev_children; c++) { oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]); if (oldvd != NULL) return (oldvd); } /* * Check for a completed replacement. We always consider the first * vdev in the list to be the oldest vdev, and the last one to be * the newest (see spa_vdev_attach() for how that works). In * the case where the newest vdev is faulted, we will not automatically * remove it after a resilver completes. This is OK as it will require * user intervention to determine which disk the admin wishes to keep. */ if (vd->vdev_ops == &vdev_replacing_ops) { ASSERT(vd->vdev_children > 1); newvd = vd->vdev_child[vd->vdev_children - 1]; oldvd = vd->vdev_child[0]; if (vdev_dtl_empty(newvd, DTL_MISSING) && vdev_dtl_empty(newvd, DTL_OUTAGE) && !vdev_dtl_required(oldvd)) return (oldvd); } /* * Check for a completed resilver with the 'unspare' flag set. */ if (vd->vdev_ops == &vdev_spare_ops) { vdev_t *first = vd->vdev_child[0]; vdev_t *last = vd->vdev_child[vd->vdev_children - 1]; if (last->vdev_unspare) { oldvd = first; newvd = last; } else if (first->vdev_unspare) { oldvd = last; newvd = first; } else { oldvd = NULL; } if (oldvd != NULL && vdev_dtl_empty(newvd, DTL_MISSING) && vdev_dtl_empty(newvd, DTL_OUTAGE) && !vdev_dtl_required(oldvd)) return (oldvd); /* * If there are more than two spares attached to a disk, * and those spares are not required, then we want to * attempt to free them up now so that they can be used * by other pools. Once we're back down to a single * disk+spare, we stop removing them. */ if (vd->vdev_children > 2) { newvd = vd->vdev_child[1]; if (newvd->vdev_isspare && last->vdev_isspare && vdev_dtl_empty(last, DTL_MISSING) && vdev_dtl_empty(last, DTL_OUTAGE) && !vdev_dtl_required(newvd)) return (newvd); } } return (NULL); } static void spa_vdev_resilver_done(spa_t *spa) { vdev_t *vd, *pvd, *ppvd; uint64_t guid, sguid, pguid, ppguid; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) { pvd = vd->vdev_parent; ppvd = pvd->vdev_parent; guid = vd->vdev_guid; pguid = pvd->vdev_guid; ppguid = ppvd->vdev_guid; sguid = 0; /* * If we have just finished replacing a hot spared device, then * we need to detach the parent's first child (the original hot * spare) as well. */ if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 && ppvd->vdev_children == 2) { ASSERT(pvd->vdev_ops == &vdev_replacing_ops); sguid = ppvd->vdev_child[1]->vdev_guid; } ASSERT(vd->vdev_resilver_txg == 0 || !vdev_dtl_required(vd)); spa_config_exit(spa, SCL_ALL, FTAG); if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0) return; if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0) return; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); } spa_config_exit(spa, SCL_ALL, FTAG); } /* * Update the stored path or FRU for this vdev. */ int spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value, boolean_t ispath) { vdev_t *vd; boolean_t sync = B_FALSE; ASSERT(spa_writeable(spa)); spa_vdev_state_enter(spa, SCL_ALL); if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL) return (spa_vdev_state_exit(spa, NULL, ENOENT)); if (!vd->vdev_ops->vdev_op_leaf) return (spa_vdev_state_exit(spa, NULL, ENOTSUP)); if (ispath) { if (strcmp(value, vd->vdev_path) != 0) { spa_strfree(vd->vdev_path); vd->vdev_path = spa_strdup(value); sync = B_TRUE; } } else { if (vd->vdev_fru == NULL) { vd->vdev_fru = spa_strdup(value); sync = B_TRUE; } else if (strcmp(value, vd->vdev_fru) != 0) { spa_strfree(vd->vdev_fru); vd->vdev_fru = spa_strdup(value); sync = B_TRUE; } } return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0)); } int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath) { return (spa_vdev_set_common(spa, guid, newpath, B_TRUE)); } int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru) { return (spa_vdev_set_common(spa, guid, newfru, B_FALSE)); } /* * ========================================================================== * SPA Scanning * ========================================================================== */ int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd)); } int spa_scan_stop(spa_t *spa) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (dsl_scan_resilvering(spa->spa_dsl_pool)) return (SET_ERROR(EBUSY)); return (dsl_scan_cancel(spa->spa_dsl_pool)); } int spa_scan(spa_t *spa, pool_scan_func_t func) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE) return (SET_ERROR(ENOTSUP)); /* * If a resilver was requested, but there is no DTL on a * writeable leaf device, we have nothing to do. */ if (func == POOL_SCAN_RESILVER && !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) { spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); return (0); } return (dsl_scan(spa->spa_dsl_pool, func)); } /* * ========================================================================== * SPA async task processing * ========================================================================== */ static void spa_async_remove(spa_t *spa, vdev_t *vd) { if (vd->vdev_remove_wanted) { vd->vdev_remove_wanted = B_FALSE; vd->vdev_delayed_close = B_FALSE; vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE); /* * We want to clear the stats, but we don't want to do a full * vdev_clear() as that will cause us to throw away * degraded/faulted state as well as attempt to reopen the * device, all of which is a waste. */ vd->vdev_stat.vs_read_errors = 0; vd->vdev_stat.vs_write_errors = 0; vd->vdev_stat.vs_checksum_errors = 0; vdev_state_dirty(vd->vdev_top); /* Tell userspace that the vdev is gone. */ zfs_post_remove(spa, vd); } for (int c = 0; c < vd->vdev_children; c++) spa_async_remove(spa, vd->vdev_child[c]); } static void spa_async_probe(spa_t *spa, vdev_t *vd) { if (vd->vdev_probe_wanted) { vd->vdev_probe_wanted = B_FALSE; vdev_reopen(vd); /* vdev_open() does the actual probe */ } for (int c = 0; c < vd->vdev_children; c++) spa_async_probe(spa, vd->vdev_child[c]); } static void spa_async_autoexpand(spa_t *spa, vdev_t *vd) { sysevent_id_t eid; nvlist_t *attr; char *physpath; if (!spa->spa_autoexpand) return; for (int c = 0; c < vd->vdev_children; c++) { vdev_t *cvd = vd->vdev_child[c]; spa_async_autoexpand(spa, cvd); } if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) return; physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, ESC_ZFS_VDEV_AUTOEXPAND, attr, &eid, DDI_SLEEP); nvlist_free(attr); kmem_free(physpath, MAXPATHLEN); } static void spa_async_thread(void *arg) { spa_t *spa = (spa_t *)arg; int tasks; ASSERT(spa->spa_sync_on); mutex_enter(&spa->spa_async_lock); tasks = spa->spa_async_tasks; spa->spa_async_tasks &= SPA_ASYNC_REMOVE; mutex_exit(&spa->spa_async_lock); /* * See if the config needs to be updated. */ if (tasks & SPA_ASYNC_CONFIG_UPDATE) { uint64_t old_space, new_space; mutex_enter(&spa_namespace_lock); old_space = metaslab_class_get_space(spa_normal_class(spa)); spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); new_space = metaslab_class_get_space(spa_normal_class(spa)); mutex_exit(&spa_namespace_lock); /* * If the pool grew as a result of the config update, * then log an internal history event. */ if (new_space != old_space) { spa_history_log_internal(spa, "vdev online", NULL, "pool '%s' size: %llu(+%llu)", spa_name(spa), new_space, new_space - old_space); } } if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) { spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa_async_autoexpand(spa, spa->spa_root_vdev); spa_config_exit(spa, SCL_CONFIG, FTAG); } /* * See if any devices need to be probed. */ if (tasks & SPA_ASYNC_PROBE) { spa_vdev_state_enter(spa, SCL_NONE); spa_async_probe(spa, spa->spa_root_vdev); (void) spa_vdev_state_exit(spa, NULL, 0); } /* * If any devices are done replacing, detach them. */ if (tasks & SPA_ASYNC_RESILVER_DONE) spa_vdev_resilver_done(spa); /* * Kick off a resilver. */ if (tasks & SPA_ASYNC_RESILVER) dsl_resilver_restart(spa->spa_dsl_pool, 0); if (tasks & SPA_ASYNC_INITIALIZE_RESTART) { mutex_enter(&spa_namespace_lock); spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); vdev_initialize_restart(spa->spa_root_vdev); spa_config_exit(spa, SCL_CONFIG, FTAG); mutex_exit(&spa_namespace_lock); } /* * Let the world know that we're done. */ mutex_enter(&spa->spa_async_lock); spa->spa_async_thread = NULL; cv_broadcast(&spa->spa_async_cv); mutex_exit(&spa->spa_async_lock); thread_exit(); } static void spa_async_thread_vd(void *arg) { spa_t *spa = arg; int tasks; mutex_enter(&spa->spa_async_lock); tasks = spa->spa_async_tasks; retry: spa->spa_async_tasks &= ~SPA_ASYNC_REMOVE; mutex_exit(&spa->spa_async_lock); /* * See if any devices need to be marked REMOVED. */ if (tasks & SPA_ASYNC_REMOVE) { spa_vdev_state_enter(spa, SCL_NONE); spa_async_remove(spa, spa->spa_root_vdev); for (int i = 0; i < spa->spa_l2cache.sav_count; i++) spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]); for (int i = 0; i < spa->spa_spares.sav_count; i++) spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]); (void) spa_vdev_state_exit(spa, NULL, 0); } /* * Let the world know that we're done. */ mutex_enter(&spa->spa_async_lock); tasks = spa->spa_async_tasks; if ((tasks & SPA_ASYNC_REMOVE) != 0) goto retry; spa->spa_async_thread_vd = NULL; cv_broadcast(&spa->spa_async_cv); mutex_exit(&spa->spa_async_lock); thread_exit(); } void spa_async_suspend(spa_t *spa) { mutex_enter(&spa->spa_async_lock); spa->spa_async_suspended++; while (spa->spa_async_thread != NULL || spa->spa_async_thread_vd != NULL) cv_wait(&spa->spa_async_cv, &spa->spa_async_lock); mutex_exit(&spa->spa_async_lock); spa_vdev_remove_suspend(spa); zthr_t *condense_thread = spa->spa_condense_zthr; if (condense_thread != NULL && zthr_isrunning(condense_thread)) VERIFY0(zthr_cancel(condense_thread)); zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; if (discard_thread != NULL && zthr_isrunning(discard_thread)) VERIFY0(zthr_cancel(discard_thread)); } void spa_async_resume(spa_t *spa) { mutex_enter(&spa->spa_async_lock); ASSERT(spa->spa_async_suspended != 0); spa->spa_async_suspended--; mutex_exit(&spa->spa_async_lock); spa_restart_removal(spa); zthr_t *condense_thread = spa->spa_condense_zthr; if (condense_thread != NULL && !zthr_isrunning(condense_thread)) zthr_resume(condense_thread); zthr_t *discard_thread = spa->spa_checkpoint_discard_zthr; if (discard_thread != NULL && !zthr_isrunning(discard_thread)) zthr_resume(discard_thread); } static boolean_t spa_async_tasks_pending(spa_t *spa) { uint_t non_config_tasks; uint_t config_task; boolean_t config_task_suspended; non_config_tasks = spa->spa_async_tasks & ~(SPA_ASYNC_CONFIG_UPDATE | SPA_ASYNC_REMOVE); config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE; if (spa->spa_ccw_fail_time == 0) { config_task_suspended = B_FALSE; } else { config_task_suspended = (gethrtime() - spa->spa_ccw_fail_time) < (zfs_ccw_retry_interval * NANOSEC); } return (non_config_tasks || (config_task && !config_task_suspended)); } static void spa_async_dispatch(spa_t *spa) { mutex_enter(&spa->spa_async_lock); if (spa_async_tasks_pending(spa) && !spa->spa_async_suspended && spa->spa_async_thread == NULL && rootdir != NULL) spa->spa_async_thread = thread_create(NULL, 0, spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); } static void spa_async_dispatch_vd(spa_t *spa) { mutex_enter(&spa->spa_async_lock); if ((spa->spa_async_tasks & SPA_ASYNC_REMOVE) != 0 && !spa->spa_async_suspended && spa->spa_async_thread_vd == NULL && rootdir != NULL) spa->spa_async_thread_vd = thread_create(NULL, 0, spa_async_thread_vd, spa, 0, &p0, TS_RUN, maxclsyspri); mutex_exit(&spa->spa_async_lock); } void spa_async_request(spa_t *spa, int task) { zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task); mutex_enter(&spa->spa_async_lock); spa->spa_async_tasks |= task; mutex_exit(&spa->spa_async_lock); spa_async_dispatch_vd(spa); } /* * ========================================================================== * SPA syncing routines * ========================================================================== */ static int bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { bpobj_t *bpo = arg; bpobj_enqueue(bpo, bp, tx); return (0); } static int spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { zio_t *zio = arg; zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, BP_GET_PSIZE(bp), zio->io_flags)); return (0); } /* * Note: this simple function is not inlined to make it easier to dtrace the * amount of time spent syncing frees. */ static void spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) { zio_t *zio = zio_root(spa, NULL, NULL, 0); bplist_iterate(bpl, spa_free_sync_cb, zio, tx); VERIFY(zio_wait(zio) == 0); } /* * Note: this simple function is not inlined to make it easier to dtrace the * amount of time spent syncing deferred frees. */ static void spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) { zio_t *zio = zio_root(spa, NULL, NULL, 0); VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, spa_free_sync_cb, zio, tx), ==, 0); VERIFY0(zio_wait(zio)); } static void spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) { char *packed = NULL; size_t bufsize; size_t nvsize = 0; dmu_buf_t *db; VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0); /* * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration * information. This avoids the dmu_buf_will_dirty() path and * saves us a pre-read to get data we don't actually care about. */ bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); packed = kmem_alloc(bufsize, KM_SLEEP); VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); bzero(packed + nvsize, bufsize - nvsize); dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); kmem_free(packed, bufsize); VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = nvsize; dmu_buf_rele(db, FTAG); } static void spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, const char *config, const char *entry) { nvlist_t *nvroot; nvlist_t **list; int i; if (!sav->sav_sync) return; /* * Update the MOS nvlist describing the list of available devices. * spa_validate_aux() will have already made sure this nvlist is * valid and the vdevs are labeled appropriately. */ if (sav->sav_object == 0) { sav->sav_object = dmu_object_alloc(spa->spa_meta_objset, DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx); VERIFY(zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1, &sav->sav_object, tx) == 0); } VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (sav->sav_count == 0) { VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); } else { list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < sav->sav_count; i++) list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_FALSE, VDEV_CONFIG_L2CACHE); VERIFY(nvlist_add_nvlist_array(nvroot, config, list, sav->sav_count) == 0); for (i = 0; i < sav->sav_count; i++) nvlist_free(list[i]); kmem_free(list, sav->sav_count * sizeof (void *)); } spa_sync_nvlist(spa, sav->sav_object, nvroot, tx); nvlist_free(nvroot); sav->sav_sync = B_FALSE; } /* * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t. * The all-vdev ZAP must be empty. */ static void spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx) { spa_t *spa = vd->vdev_spa; if (vd->vdev_top_zap != 0) { VERIFY0(zap_add_int(spa->spa_meta_objset, avz, vd->vdev_top_zap, tx)); } if (vd->vdev_leaf_zap != 0) { VERIFY0(zap_add_int(spa->spa_meta_objset, avz, vd->vdev_leaf_zap, tx)); } for (uint64_t i = 0; i < vd->vdev_children; i++) { spa_avz_build(vd->vdev_child[i], avz, tx); } } static void spa_sync_config_object(spa_t *spa, dmu_tx_t *tx) { nvlist_t *config; /* * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS, * its config may not be dirty but we still need to build per-vdev ZAPs. * Similarly, if the pool is being assembled (e.g. after a split), we * need to rebuild the AVZ although the config may not be dirty. */ if (list_is_empty(&spa->spa_config_dirty_list) && spa->spa_avz_action == AVZ_ACTION_NONE) return; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE || spa->spa_avz_action == AVZ_ACTION_INITIALIZE || spa->spa_all_vdev_zaps != 0); if (spa->spa_avz_action == AVZ_ACTION_REBUILD) { /* Make and build the new AVZ */ uint64_t new_avz = zap_create(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx); spa_avz_build(spa->spa_root_vdev, new_avz, tx); /* Diff old AVZ with new one */ zap_cursor_t zc; zap_attribute_t za; for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_all_vdev_zaps); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t vdzap = za.za_first_integer; if (zap_lookup_int(spa->spa_meta_objset, new_avz, vdzap) == ENOENT) { /* * ZAP is listed in old AVZ but not in new one; * destroy it */ VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap, tx)); } } zap_cursor_fini(&zc); /* Destroy the old AVZ */ VERIFY0(zap_destroy(spa->spa_meta_objset, spa->spa_all_vdev_zaps, tx)); /* Replace the old AVZ in the dir obj with the new one */ VERIFY0(zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, sizeof (new_avz), 1, &new_avz, tx)); spa->spa_all_vdev_zaps = new_avz; } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) { zap_cursor_t zc; zap_attribute_t za; /* Walk through the AVZ and destroy all listed ZAPs */ for (zap_cursor_init(&zc, spa->spa_meta_objset, spa->spa_all_vdev_zaps); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { uint64_t zap = za.za_first_integer; VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx)); } zap_cursor_fini(&zc); /* Destroy and unlink the AVZ itself */ VERIFY0(zap_destroy(spa->spa_meta_objset, spa->spa_all_vdev_zaps, tx)); VERIFY0(zap_remove(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx)); spa->spa_all_vdev_zaps = 0; } if (spa->spa_all_vdev_zaps == 0) { spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx); } spa->spa_avz_action = AVZ_ACTION_NONE; /* Create ZAPs for vdevs that don't have them. */ vdev_construct_zaps(spa->spa_root_vdev, tx); config = spa_config_generate(spa, spa->spa_root_vdev, dmu_tx_get_txg(tx), B_FALSE); /* * If we're upgrading the spa version then make sure that * the config object gets updated with the correct version. */ if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version) fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa->spa_uberblock.ub_version); spa_config_exit(spa, SCL_STATE, FTAG); nvlist_free(spa->spa_config_syncing); spa->spa_config_syncing = config; spa_sync_nvlist(spa, spa->spa_config_object, config, tx); } static void spa_sync_version(void *arg, dmu_tx_t *tx) { uint64_t *versionp = arg; uint64_t version = *versionp; spa_t *spa = dmu_tx_pool(tx)->dp_spa; /* * Setting the version is special cased when first creating the pool. */ ASSERT(tx->tx_txg != TXG_INITIAL); ASSERT(SPA_VERSION_IS_SUPPORTED(version)); ASSERT(version >= spa_version(spa)); spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); spa_history_log_internal(spa, "set", tx, "version=%lld", version); } /* * Set zpool properties. */ static void spa_sync_props(void *arg, dmu_tx_t *tx) { nvlist_t *nvp = arg; spa_t *spa = dmu_tx_pool(tx)->dp_spa; objset_t *mos = spa->spa_meta_objset; nvpair_t *elem = NULL; mutex_enter(&spa->spa_props_lock); while ((elem = nvlist_next_nvpair(nvp, elem))) { uint64_t intval; char *strval, *fname; zpool_prop_t prop; const char *propname; zprop_type_t proptype; spa_feature_t fid; switch (prop = zpool_name_to_prop(nvpair_name(elem))) { case ZPOOL_PROP_INVAL: /* * We checked this earlier in spa_prop_validate(). */ ASSERT(zpool_prop_feature(nvpair_name(elem))); fname = strchr(nvpair_name(elem), '@') + 1; VERIFY0(zfeature_lookup_name(fname, &fid)); spa_feature_enable(spa, fid, tx); spa_history_log_internal(spa, "set", tx, "%s=enabled", nvpair_name(elem)); break; case ZPOOL_PROP_VERSION: intval = fnvpair_value_uint64(elem); /* * The version is synced seperatly before other * properties and should be correct by now. */ ASSERT3U(spa_version(spa), >=, intval); break; case ZPOOL_PROP_ALTROOT: /* * 'altroot' is a non-persistent property. It should * have been set temporarily at creation or import time. */ ASSERT(spa->spa_root != NULL); break; case ZPOOL_PROP_READONLY: case ZPOOL_PROP_CACHEFILE: /* * 'readonly' and 'cachefile' are also non-persisitent * properties. */ break; case ZPOOL_PROP_COMMENT: strval = fnvpair_value_string(elem); if (spa->spa_comment != NULL) spa_strfree(spa->spa_comment); spa->spa_comment = spa_strdup(strval); /* * We need to dirty the configuration on all the vdevs * so that their labels get updated. It's unnecessary * to do this for pool creation since the vdev's * configuratoin has already been dirtied. */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); spa_history_log_internal(spa, "set", tx, "%s=%s", nvpair_name(elem), strval); break; default: /* * Set pool property values in the poolprops mos object. */ if (spa->spa_pool_props_object == 0) { spa->spa_pool_props_object = zap_create_link(mos, DMU_OT_POOL_PROPS, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS, tx); } /* normalize the property name */ propname = zpool_prop_to_name(prop); proptype = zpool_prop_get_type(prop); if (nvpair_type(elem) == DATA_TYPE_STRING) { ASSERT(proptype == PROP_TYPE_STRING); strval = fnvpair_value_string(elem); VERIFY0(zap_update(mos, spa->spa_pool_props_object, propname, 1, strlen(strval) + 1, strval, tx)); spa_history_log_internal(spa, "set", tx, "%s=%s", nvpair_name(elem), strval); } else if (nvpair_type(elem) == DATA_TYPE_UINT64) { intval = fnvpair_value_uint64(elem); if (proptype == PROP_TYPE_INDEX) { const char *unused; VERIFY0(zpool_prop_index_to_string( prop, intval, &unused)); } VERIFY0(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx)); spa_history_log_internal(spa, "set", tx, "%s=%lld", nvpair_name(elem), intval); } else { ASSERT(0); /* not allowed */ } switch (prop) { case ZPOOL_PROP_DELEGATION: spa->spa_delegation = intval; break; case ZPOOL_PROP_BOOTFS: spa->spa_bootfs = intval; break; case ZPOOL_PROP_FAILUREMODE: spa->spa_failmode = intval; break; case ZPOOL_PROP_AUTOEXPAND: spa->spa_autoexpand = intval; if (tx->tx_txg != TXG_INITIAL) spa_async_request(spa, SPA_ASYNC_AUTOEXPAND); break; case ZPOOL_PROP_DEDUPDITTO: spa->spa_dedup_ditto = intval; break; default: break; } } } mutex_exit(&spa->spa_props_lock); } /* * Perform one-time upgrade on-disk changes. spa_version() does not * reflect the new version this txg, so there must be no changes this * txg to anything that the upgrade code depends on after it executes. * Therefore this must be called after dsl_pool_sync() does the sync * tasks. */ static void spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx) { dsl_pool_t *dp = spa->spa_dsl_pool; ASSERT(spa->spa_sync_pass == 1); rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN && spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) { dsl_pool_create_origin(dp, tx); /* Keeping the origin open increases spa_minref */ spa->spa_minref += 3; } if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES && spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) { dsl_pool_upgrade_clones(dp, tx); } if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES && spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) { dsl_pool_upgrade_dir_clones(dp, tx); /* Keeping the freedir open increases spa_minref */ spa->spa_minref += 3; } if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES && spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { spa_feature_create_zap_objects(spa, tx); } /* * LZ4_COMPRESS feature's behaviour was changed to activate_on_enable * when possibility to use lz4 compression for metadata was added * Old pools that have this feature enabled must be upgraded to have * this feature active */ if (spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) { boolean_t lz4_en = spa_feature_is_enabled(spa, SPA_FEATURE_LZ4_COMPRESS); boolean_t lz4_ac = spa_feature_is_active(spa, SPA_FEATURE_LZ4_COMPRESS); if (lz4_en && !lz4_ac) spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx); } /* * If we haven't written the salt, do so now. Note that the * feature may not be activated yet, but that's fine since * the presence of this ZAP entry is backwards compatible. */ if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT) == ENOENT) { VERIFY0(zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1, sizeof (spa->spa_cksum_salt.zcs_bytes), spa->spa_cksum_salt.zcs_bytes, tx)); } rrw_exit(&dp->dp_config_rwlock, FTAG); } static void vdev_indirect_state_sync_verify(vdev_t *vd) { vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping; vdev_indirect_births_t *vib = vd->vdev_indirect_births; if (vd->vdev_ops == &vdev_indirect_ops) { ASSERT(vim != NULL); ASSERT(vib != NULL); } if (vdev_obsolete_sm_object(vd) != 0) { ASSERT(vd->vdev_obsolete_sm != NULL); ASSERT(vd->vdev_removing || vd->vdev_ops == &vdev_indirect_ops); ASSERT(vdev_indirect_mapping_num_entries(vim) > 0); ASSERT(vdev_indirect_mapping_bytes_mapped(vim) > 0); ASSERT3U(vdev_obsolete_sm_object(vd), ==, space_map_object(vd->vdev_obsolete_sm)); ASSERT3U(vdev_indirect_mapping_bytes_mapped(vim), >=, space_map_allocated(vd->vdev_obsolete_sm)); } ASSERT(vd->vdev_obsolete_segments != NULL); /* * Since frees / remaps to an indirect vdev can only * happen in syncing context, the obsolete segments * tree must be empty when we start syncing. */ ASSERT0(range_tree_space(vd->vdev_obsolete_segments)); } /* * Sync the specified transaction group. New blocks may be dirtied as * part of the process, so we iterate until it converges. */ void spa_sync(spa_t *spa, uint64_t txg) { dsl_pool_t *dp = spa->spa_dsl_pool; objset_t *mos = spa->spa_meta_objset; bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd; dmu_tx_t *tx; int error; uint32_t max_queue_depth = zfs_vdev_async_write_max_active * zfs_vdev_queue_depth_pct / 100; VERIFY(spa_writeable(spa)); /* * Wait for i/os issued in open context that need to complete * before this txg syncs. */ (void) zio_wait(spa->spa_txg_zio[txg & TXG_MASK]); spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); /* * Lock out configuration changes. */ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); spa->spa_syncing_txg = txg; spa->spa_sync_pass = 0; for (int i = 0; i < spa->spa_alloc_count; i++) { mutex_enter(&spa->spa_alloc_locks[i]); VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i])); mutex_exit(&spa->spa_alloc_locks[i]); } /* * If there are any pending vdev state changes, convert them * into config changes that go out with this transaction group. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); while (list_head(&spa->spa_state_dirty_list) != NULL) { /* * We need the write lock here because, for aux vdevs, * calling vdev_config_dirty() modifies sav_config. * This is ugly and will become unnecessary when we * eliminate the aux vdev wart by integrating all vdevs * into the root vdev tree. */ spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER); while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) { vdev_state_clean(vd); vdev_config_dirty(vd); } spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } spa_config_exit(spa, SCL_STATE, FTAG); tx = dmu_tx_create_assigned(dp, txg); spa->spa_sync_starttime = gethrtime(); #ifdef illumos VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, spa->spa_sync_starttime + spa->spa_deadman_synctime)); #else /* !illumos */ #ifdef _KERNEL callout_schedule(&spa->spa_deadman_cycid, hz * spa->spa_deadman_synctime / NANOSEC); #endif #endif /* illumos */ /* * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg, * set spa_deflate if we have no raid-z vdevs. */ if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE && spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) { int i; for (i = 0; i < rvd->vdev_children; i++) { vd = rvd->vdev_child[i]; if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE) break; } if (i == rvd->vdev_children) { spa->spa_deflate = TRUE; VERIFY(0 == zap_add(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE, sizeof (uint64_t), 1, &spa->spa_deflate, tx)); } } /* * Set the top-level vdev's max queue depth. Evaluate each * top-level's async write queue depth in case it changed. * The max queue depth will not change in the middle of syncing * out this txg. */ uint64_t slots_per_allocator = 0; for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; if (mg == NULL || mg->mg_class != spa_normal_class(spa) || !metaslab_group_initialized(mg)) continue; /* * It is safe to do a lock-free check here because only async * allocations look at mg_max_alloc_queue_depth, and async * allocations all happen from spa_sync(). */ for (int i = 0; i < spa->spa_alloc_count; i++) ASSERT0(refcount_count(&(mg->mg_alloc_queue_depth[i]))); mg->mg_max_alloc_queue_depth = max_queue_depth; for (int i = 0; i < spa->spa_alloc_count; i++) { mg->mg_cur_max_alloc_queue_depth[i] = zfs_vdev_def_queue_depth; } slots_per_allocator += zfs_vdev_def_queue_depth; } metaslab_class_t *mc = spa_normal_class(spa); for (int i = 0; i < spa->spa_alloc_count; i++) { ASSERT0(refcount_count(&mc->mc_alloc_slots[i])); mc->mc_alloc_max_slots[i] = slots_per_allocator; } mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled; for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; vdev_indirect_state_sync_verify(vd); if (vdev_indirect_should_condense(vd)) { spa_condense_indirect_start_sync(vd, tx); break; } } /* * Iterate to convergence. */ do { int pass = ++spa->spa_sync_pass; spa_sync_config_object(spa, tx); spa_sync_aux_dev(spa, &spa->spa_spares, tx, ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES); spa_sync_aux_dev(spa, &spa->spa_l2cache, tx, ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); spa_errlog_sync(spa, txg); dsl_pool_sync(dp, txg); if (pass < zfs_sync_pass_deferred_free) { spa_sync_frees(spa, free_bpl, tx); } else { /* * We can not defer frees in pass 1, because * we sync the deferred frees later in pass 1. */ ASSERT3U(pass, >, 1); bplist_iterate(free_bpl, bpobj_enqueue_cb, &spa->spa_deferred_bpobj, tx); } ddt_sync(spa, txg); dsl_scan_sync(dp, tx); if (spa->spa_vdev_removal != NULL) svr_sync(spa, tx); while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)) != NULL) vdev_sync(vd, txg); if (pass == 1) { spa_sync_upgrades(spa, tx); ASSERT3U(txg, >=, spa->spa_uberblock.ub_rootbp.blk_birth); /* * Note: We need to check if the MOS is dirty * because we could have marked the MOS dirty * without updating the uberblock (e.g. if we * have sync tasks but no dirty user data). We * need to check the uberblock's rootbp because * it is updated if we have synced out dirty * data (though in this case the MOS will most * likely also be dirty due to second order * effects, we don't want to rely on that here). */ if (spa->spa_uberblock.ub_rootbp.blk_birth < txg && !dmu_objset_is_dirty(mos, txg)) { /* * Nothing changed on the first pass, * therefore this TXG is a no-op. Avoid * syncing deferred frees, so that we * can keep this TXG as a no-op. */ ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg)); ASSERT(txg_list_empty(&dp->dp_early_sync_tasks, txg)); break; } spa_sync_deferred_frees(spa, tx); } } while (dmu_objset_is_dirty(mos, txg)); if (!list_is_empty(&spa->spa_config_dirty_list)) { /* * Make sure that the number of ZAPs for all the vdevs matches * the number of ZAPs in the per-vdev ZAP list. This only gets * called if the config is dirty; otherwise there may be * outstanding AVZ operations that weren't completed in * spa_sync_config_object. */ uint64_t all_vdev_zap_entry_count; ASSERT0(zap_count(spa->spa_meta_objset, spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count)); ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==, all_vdev_zap_entry_count); } if (spa->spa_vdev_removal != NULL) { ASSERT0(spa->spa_vdev_removal->svr_bytes_done[txg & TXG_MASK]); } /* * Rewrite the vdev configuration (which includes the uberblock) * to commit the transaction group. * * If there are no dirty vdevs, we sync the uberblock to a few * random top-level vdevs that are known to be visible in the * config cache (see spa_vdev_add() for a complete description). * If there *are* dirty vdevs, sync the uberblock to all vdevs. */ for (;;) { /* * We hold SCL_STATE to prevent vdev open/close/etc. * while we're attempting to write the vdev labels. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); if (list_is_empty(&spa->spa_config_dirty_list)) { vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; int c0 = spa_get_random(children); for (int c = 0; c < children; c++) { vd = rvd->vdev_child[(c0 + c) % children]; /* Stop when revisiting the first vdev */ if (c > 0 && svd[0] == vd) break; if (vd->vdev_ms_array == 0 || vd->vdev_islog || !vdev_is_concrete(vd)) continue; svd[svdcount++] = vd; if (svdcount == SPA_SYNC_MIN_VDEVS) break; } error = vdev_config_sync(svd, svdcount, txg); } else { error = vdev_config_sync(rvd->vdev_child, rvd->vdev_children, txg); } if (error == 0) spa->spa_last_synced_guid = rvd->vdev_guid; spa_config_exit(spa, SCL_STATE, FTAG); if (error == 0) break; zio_suspend(spa, NULL); zio_resume_wait(spa); } dmu_tx_commit(tx); #ifdef illumos VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); #else /* !illumos */ #ifdef _KERNEL callout_drain(&spa->spa_deadman_cycid); #endif #endif /* illumos */ /* * Clear the dirty config list. */ while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL) vdev_config_clean(vd); /* * Now that the new config has synced transactionally, * let it become visible to the config cache. */ if (spa->spa_config_syncing != NULL) { spa_config_set(spa, spa->spa_config_syncing); spa->spa_config_txg = txg; spa->spa_config_syncing = NULL; } dsl_pool_sync_done(dp, txg); for (int i = 0; i < spa->spa_alloc_count; i++) { mutex_enter(&spa->spa_alloc_locks[i]); VERIFY0(avl_numnodes(&spa->spa_alloc_trees[i])); mutex_exit(&spa->spa_alloc_locks[i]); } /* * Update usable space statistics. */ while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))) != NULL) vdev_sync_done(vd, txg); spa_update_dspace(spa); /* * It had better be the case that we didn't dirty anything * since vdev_config_sync(). */ ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg)); ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg)); ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg)); while (zfs_pause_spa_sync) delay(1); spa->spa_sync_pass = 0; /* * Update the last synced uberblock here. We want to do this at * the end of spa_sync() so that consumers of spa_last_synced_txg() * will be guaranteed that all the processing associated with * that txg has been completed. */ spa->spa_ubsync = spa->spa_uberblock; spa_config_exit(spa, SCL_CONFIG, FTAG); spa_handle_ignored_writes(spa); /* * If any async tasks have been requested, kick them off. */ spa_async_dispatch(spa); spa_async_dispatch_vd(spa); } /* * Sync all pools. We don't want to hold the namespace lock across these * operations, so we take a reference on the spa_t and drop the lock during the * sync. */ void spa_sync_allpools(void) { spa_t *spa = NULL; mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { if (spa_state(spa) != POOL_STATE_ACTIVE || !spa_writeable(spa) || spa_suspended(spa)) continue; spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); txg_wait_synced(spa_get_dsl(spa), 0); mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); } mutex_exit(&spa_namespace_lock); } /* * ========================================================================== * Miscellaneous routines * ========================================================================== */ /* * Remove all pools in the system. */ void spa_evict_all(void) { spa_t *spa; /* * Remove all cached state. All pools should be closed now, * so every spa in the AVL tree should be unreferenced. */ mutex_enter(&spa_namespace_lock); while ((spa = spa_next(NULL)) != NULL) { /* * Stop async tasks. The async thread may need to detach * a device that's been replaced, which requires grabbing * spa_namespace_lock, so we must drop it here. */ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); spa_async_suspend(spa); mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { spa_unload(spa); spa_deactivate(spa); } spa_remove(spa); } mutex_exit(&spa_namespace_lock); } vdev_t * spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux) { vdev_t *vd; int i; if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL) return (vd); if (aux) { for (i = 0; i < spa->spa_l2cache.sav_count; i++) { vd = spa->spa_l2cache.sav_vdevs[i]; if (vd->vdev_guid == guid) return (vd); } for (i = 0; i < spa->spa_spares.sav_count; i++) { vd = spa->spa_spares.sav_vdevs[i]; if (vd->vdev_guid == guid) return (vd); } } return (NULL); } void spa_upgrade(spa_t *spa, uint64_t version) { ASSERT(spa_writeable(spa)); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); /* * This should only be called for a non-faulted pool, and since a * future version would result in an unopenable pool, this shouldn't be * possible. */ ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version)); ASSERT3U(version, >=, spa->spa_uberblock.ub_version); spa->spa_uberblock.ub_version = version; vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); txg_wait_synced(spa_get_dsl(spa), 0); } boolean_t spa_has_spare(spa_t *spa, uint64_t guid) { int i; uint64_t spareguid; spa_aux_vdev_t *sav = &spa->spa_spares; for (i = 0; i < sav->sav_count; i++) if (sav->sav_vdevs[i]->vdev_guid == guid) return (B_TRUE); for (i = 0; i < sav->sav_npending; i++) { if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID, &spareguid) == 0 && spareguid == guid) return (B_TRUE); } return (B_FALSE); } /* * Check if a pool has an active shared spare device. * Note: reference count of an active spare is 2, as a spare and as a replace */ static boolean_t spa_has_active_shared_spare(spa_t *spa) { int i, refcnt; uint64_t pool; spa_aux_vdev_t *sav = &spa->spa_spares; for (i = 0; i < sav->sav_count; i++) { if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool, &refcnt) && pool != 0ULL && pool == spa_guid(spa) && refcnt > 2) return (B_TRUE); } return (B_FALSE); } sysevent_t * spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name) { sysevent_t *ev = NULL; #ifdef _KERNEL sysevent_attr_list_t *attr = NULL; sysevent_value_t value; ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", SE_SLEEP); ASSERT(ev != NULL); value.value_type = SE_DATA_TYPE_STRING; value.value.sv_string = spa_name(spa); if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) goto done; value.value_type = SE_DATA_TYPE_UINT64; value.value.sv_uint64 = spa_guid(spa); if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) goto done; if (vd) { value.value_type = SE_DATA_TYPE_UINT64; value.value.sv_uint64 = vd->vdev_guid; if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, SE_SLEEP) != 0) goto done; if (vd->vdev_path) { value.value_type = SE_DATA_TYPE_STRING; value.value.sv_string = vd->vdev_path; if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, &value, SE_SLEEP) != 0) goto done; } } if (hist_nvl != NULL) { fnvlist_merge((nvlist_t *)attr, hist_nvl); } if (sysevent_attach_attributes(ev, attr) != 0) goto done; attr = NULL; done: if (attr) sysevent_free_attr(attr); #endif return (ev); } void spa_event_post(sysevent_t *ev) { #ifdef _KERNEL sysevent_id_t eid; (void) log_sysevent(ev, SE_SLEEP, &eid); sysevent_free(ev); #endif } void spa_event_discard(sysevent_t *ev) { #ifdef _KERNEL sysevent_free(ev); #endif } /* * Post a sysevent corresponding to the given event. The 'name' must be one of * the event definitions in sys/sysevent/eventdefs.h. The payload will be * filled in from the spa and (optionally) the vdev and history nvl. This * doesn't do anything in the userland libzpool, as we don't want consumers to * misinterpret ztest or zdb as real changes. */ void spa_event_notify(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name) { spa_event_post(spa_event_create(spa, vd, hist_nvl, name)); } Index: projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c (revision 337615) +++ projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c (revision 337616) @@ -1,2410 +1,2413 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2013 Martin Matuska . All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2017 Datto Inc. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "zfs_prop.h" #include #if defined(__FreeBSD__) && defined(_KERNEL) #include #include #endif /* * SPA locking * * There are four basic locks for managing spa_t structures: * * spa_namespace_lock (global mutex) * * This lock must be acquired to do any of the following: * * - Lookup a spa_t by name * - Add or remove a spa_t from the namespace * - Increase spa_refcount from non-zero * - Check if spa_refcount is zero * - Rename a spa_t * - add/remove/attach/detach devices * - Held for the duration of create/destroy/import/export * * It does not need to handle recursion. A create or destroy may * reference objects (files or zvols) in other pools, but by * definition they must have an existing reference, and will never need * to lookup a spa_t by name. * * spa_refcount (per-spa refcount_t protected by mutex) * * This reference count keep track of any active users of the spa_t. The * spa_t cannot be destroyed or freed while this is non-zero. Internally, * the refcount is never really 'zero' - opening a pool implicitly keeps * some references in the DMU. Internally we check against spa_minref, but * present the image of a zero/non-zero value to consumers. * * spa_config_lock[] (per-spa array of rwlocks) * * This protects the spa_t from config changes, and must be held in * the following circumstances: * * - RW_READER to perform I/O to the spa * - RW_WRITER to change the vdev config * * The locking order is fairly straightforward: * * spa_namespace_lock -> spa_refcount * * The namespace lock must be acquired to increase the refcount from 0 * or to check if it is zero. * * spa_refcount -> spa_config_lock[] * * There must be at least one valid reference on the spa_t to acquire * the config lock. * * spa_namespace_lock -> spa_config_lock[] * * The namespace lock must always be taken before the config lock. * * * The spa_namespace_lock can be acquired directly and is globally visible. * * The namespace is manipulated using the following functions, all of which * require the spa_namespace_lock to be held. * * spa_lookup() Lookup a spa_t by name. * * spa_add() Create a new spa_t in the namespace. * * spa_remove() Remove a spa_t from the namespace. This also * frees up any memory associated with the spa_t. * * spa_next() Returns the next spa_t in the system, or the * first if NULL is passed. * * spa_evict_all() Shutdown and remove all spa_t structures in * the system. * * spa_guid_exists() Determine whether a pool/device guid exists. * * The spa_refcount is manipulated using the following functions: * * spa_open_ref() Adds a reference to the given spa_t. Must be * called with spa_namespace_lock held if the * refcount is currently zero. * * spa_close() Remove a reference from the spa_t. This will * not free the spa_t or remove it from the * namespace. No locking is required. * * spa_refcount_zero() Returns true if the refcount is currently * zero. Must be called with spa_namespace_lock * held. * * The spa_config_lock[] is an array of rwlocks, ordered as follows: * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV. * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}(). * * To read the configuration, it suffices to hold one of these locks as reader. * To modify the configuration, you must hold all locks as writer. To modify * vdev state without altering the vdev tree's topology (e.g. online/offline), * you must hold SCL_STATE and SCL_ZIO as writer. * * We use these distinct config locks to avoid recursive lock entry. * For example, spa_sync() (which holds SCL_CONFIG as reader) induces * block allocations (SCL_ALLOC), which may require reading space maps * from disk (dmu_read() -> zio_read() -> SCL_ZIO). * * The spa config locks cannot be normal rwlocks because we need the * ability to hand off ownership. For example, SCL_ZIO is acquired * by the issuing thread and later released by an interrupt thread. * They do, however, obey the usual write-wanted semantics to prevent * writer (i.e. system administrator) starvation. * * The lock acquisition rules are as follows: * * SCL_CONFIG * Protects changes to the vdev tree topology, such as vdev * add/remove/attach/detach. Protects the dirty config list * (spa_config_dirty_list) and the set of spares and l2arc devices. * * SCL_STATE * Protects changes to pool state and vdev state, such as vdev * online/offline/fault/degrade/clear. Protects the dirty state list * (spa_state_dirty_list) and global pool state (spa_state). * * SCL_ALLOC * Protects changes to metaslab groups and classes. * Held as reader by metaslab_alloc() and metaslab_claim(). * * SCL_ZIO * Held by bp-level zios (those which have no io_vd upon entry) * to prevent changes to the vdev tree. The bp-level zio implicitly * protects all of its vdev child zios, which do not hold SCL_ZIO. * * SCL_FREE * Protects changes to metaslab groups and classes. * Held as reader by metaslab_free(). SCL_FREE is distinct from * SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free * blocks in zio_done() while another i/o that holds either * SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete. * * SCL_VDEV * Held as reader to prevent changes to the vdev tree during trivial * inquiries such as bp_get_dsize(). SCL_VDEV is distinct from the * other locks, and lower than all of them, to ensure that it's safe * to acquire regardless of caller context. * * In addition, the following rules apply: * * (a) spa_props_lock protects pool properties, spa_config and spa_config_list. * The lock ordering is SCL_CONFIG > spa_props_lock. * * (b) I/O operations on leaf vdevs. For any zio operation that takes * an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(), * or zio_write_phys() -- the caller must ensure that the config cannot * cannot change in the interim, and that the vdev cannot be reopened. * SCL_STATE as reader suffices for both. * * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit(). * * spa_vdev_enter() Acquire the namespace lock and the config lock * for writing. * * spa_vdev_exit() Release the config lock, wait for all I/O * to complete, sync the updated configs to the * cache, and release the namespace lock. * * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit(). * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual * locking is, always, based on spa_namespace_lock and spa_config_lock[]. * * spa_rename() is also implemented within this file since it requires * manipulation of the namespace. */ static avl_tree_t spa_namespace_avl; kmutex_t spa_namespace_lock; static kcondvar_t spa_namespace_cv; static int spa_active_count; int spa_max_replication_override = SPA_DVAS_PER_BP; static kmutex_t spa_spare_lock; static avl_tree_t spa_spare_avl; static kmutex_t spa_l2cache_lock; static avl_tree_t spa_l2cache_avl; kmem_cache_t *spa_buffer_pool; int spa_mode_global; #ifdef ZFS_DEBUG /* * Everything except dprintf, spa, and indirect_remap is on by default * in debug builds. */ int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_INDIRECT_REMAP); #else int zfs_flags = 0; #endif /* * zfs_recover can be set to nonzero to attempt to recover from * otherwise-fatal errors, typically caused by on-disk corruption. When * set, calls to zfs_panic_recover() will turn into warning messages. * This should only be used as a last resort, as it typically results * in leaked space, or worse. */ boolean_t zfs_recover = B_FALSE; /* * If destroy encounters an EIO while reading metadata (e.g. indirect * blocks), space referenced by the missing metadata can not be freed. * Normally this causes the background destroy to become "stalled", as * it is unable to make forward progress. While in this stalled state, * all remaining space to free from the error-encountering filesystem is * "temporarily leaked". Set this flag to cause it to ignore the EIO, * permanently leak the space from indirect blocks that can not be read, * and continue to free everything else that it can. * * The default, "stalling" behavior is useful if the storage partially * fails (i.e. some but not all i/os fail), and then later recovers. In * this case, we will be able to continue pool operations while it is * partially failed, and when it recovers, we can continue to free the * space, with no leaks. However, note that this case is actually * fairly rare. * * Typically pools either (a) fail completely (but perhaps temporarily, * e.g. a top-level vdev going offline), or (b) have localized, * permanent errors (e.g. disk returns the wrong data due to bit flip or * firmware bug). In case (a), this setting does not matter because the * pool will be suspended and the sync thread will not be able to make * forward progress regardless. In case (b), because the error is * permanent, the best we can do is leak the minimum amount of space, * which is what setting this flag will do. Therefore, it is reasonable * for this flag to normally be set, but we chose the more conservative * approach of not setting it, so that there is no possibility of * leaking space in the "partial temporary" failure case. */ boolean_t zfs_free_leak_on_eio = B_FALSE; /* * Expiration time in milliseconds. This value has two meanings. First it is * used to determine when the spa_deadman() logic should fire. By default the * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds. * Secondly, the value determines if an I/O is considered "hung". Any I/O that * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting * in a system panic. */ uint64_t zfs_deadman_synctime_ms = 1000000ULL; /* * Check time in milliseconds. This defines the frequency at which we check * for hung I/O. */ uint64_t zfs_deadman_checktime_ms = 5000ULL; /* * Default value of -1 for zfs_deadman_enabled is resolved in * zfs_deadman_init() */ int zfs_deadman_enabled = -1; /* * The worst case is single-sector max-parity RAID-Z blocks, in which * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) * times the size; so just assume that. Add to this the fact that * we can have up to 3 DVAs per bp, and one more factor of 2 because * the block may be dittoed with up to 3 DVAs by ddt_sync(). All together, * the worst case is: * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24 */ int spa_asize_inflation = 24; #if defined(__FreeBSD__) && defined(_KERNEL) SYSCTL_DECL(_vfs_zfs); SYSCTL_INT(_vfs_zfs, OID_AUTO, recover, CTLFLAG_RWTUN, &zfs_recover, 0, "Try to recover from otherwise-fatal errors."); static int sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS) { int err, val; val = zfs_flags; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); /* * ZFS_DEBUG_MODIFY must be enabled prior to boot so all * arc buffers in the system have the necessary additional * checksum data. However, it is safe to disable at any * time. */ if (!(zfs_flags & ZFS_DEBUG_MODIFY)) val &= ~ZFS_DEBUG_MODIFY; zfs_flags = val; return (0); } SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing."); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_synctime_ms, CTLFLAG_RDTUN, &zfs_deadman_synctime_ms, 0, "Stalled ZFS I/O expiration time in milliseconds"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, deadman_checktime_ms, CTLFLAG_RDTUN, &zfs_deadman_checktime_ms, 0, "Period of checks for stalled ZFS I/O in milliseconds"); SYSCTL_INT(_vfs_zfs, OID_AUTO, deadman_enabled, CTLFLAG_RDTUN, &zfs_deadman_enabled, 0, "Kernel panic on stalled ZFS I/O"); SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_asize_inflation, CTLFLAG_RWTUN, &spa_asize_inflation, 0, "Worst case inflation factor for single sector writes"); #endif #ifndef illumos #ifdef _KERNEL static void zfs_deadman_init() { /* * If we are not i386 or amd64 or in a virtual machine, * disable ZFS deadman thread by default */ if (zfs_deadman_enabled == -1) { #if defined(__amd64__) || defined(__i386__) zfs_deadman_enabled = (vm_guest == VM_GUEST_NO) ? 1 : 0; #else zfs_deadman_enabled = 0; #endif } } #endif /* _KERNEL */ #endif /* !illumos */ /* * Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space in * the pool to be consumed. This ensures that we don't run the pool * completely out of space, due to unaccounted changes (e.g. to the MOS). * It also limits the worst-case time to allocate space. If we have * less than this amount of free space, most ZPL operations (e.g. write, * create) will return ENOSPC. * * Certain operations (e.g. file removal, most administrative actions) can * use half the slop space. They will only return ENOSPC if less than half * the slop space is free. Typically, once the pool has less than the slop * space free, the user will use these operations to free up space in the pool. * These are the operations that call dsl_pool_adjustedsize() with the netfree * argument set to TRUE. * * Operations that are almost guaranteed to free up space in the absence of * a pool checkpoint can use up to three quarters of the slop space * (e.g zfs destroy). * * A very restricted set of operations are always permitted, regardless of * the amount of free space. These are the operations that call * dsl_sync_task(ZFS_SPACE_CHECK_NONE). If these operations result in a net * increase in the amount of space used, it is possible to run the pool * completely out of space, causing it to be permanently read-only. * * Note that on very small pools, the slop space will be larger than * 3.2%, in an effort to have it be at least spa_min_slop (128MB), * but we never allow it to be more than half the pool size. * * See also the comments in zfs_space_check_t. */ int spa_slop_shift = 5; SYSCTL_INT(_vfs_zfs, OID_AUTO, spa_slop_shift, CTLFLAG_RWTUN, &spa_slop_shift, 0, "Shift value of reserved space (1/(2^spa_slop_shift))."); uint64_t spa_min_slop = 128 * 1024 * 1024; SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, spa_min_slop, CTLFLAG_RWTUN, &spa_min_slop, 0, "Minimal value of reserved space"); int spa_allocators = 4; /*PRINTFLIKE2*/ void spa_load_failed(spa_t *spa, const char *fmt, ...) { va_list adx; char buf[256]; va_start(adx, fmt); (void) vsnprintf(buf, sizeof (buf), fmt, adx); va_end(adx); zfs_dbgmsg("spa_load(%s, config %s): FAILED: %s", spa->spa_name, spa->spa_trust_config ? "trusted" : "untrusted", buf); } /*PRINTFLIKE2*/ void spa_load_note(spa_t *spa, const char *fmt, ...) { va_list adx; char buf[256]; va_start(adx, fmt); (void) vsnprintf(buf, sizeof (buf), fmt, adx); va_end(adx); zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name, spa->spa_trust_config ? "trusted" : "untrusted", buf); } /* * ========================================================================== * SPA config locking * ========================================================================== */ static void spa_config_lock_init(spa_t *spa) { for (int i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL); refcount_create_untracked(&scl->scl_count); scl->scl_writer = NULL; scl->scl_write_wanted = 0; } } static void spa_config_lock_destroy(spa_t *spa) { for (int i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; mutex_destroy(&scl->scl_lock); cv_destroy(&scl->scl_cv); refcount_destroy(&scl->scl_count); ASSERT(scl->scl_writer == NULL); ASSERT(scl->scl_write_wanted == 0); } } int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw) { for (int i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; if (!(locks & (1 << i))) continue; mutex_enter(&scl->scl_lock); if (rw == RW_READER) { if (scl->scl_writer || scl->scl_write_wanted) { mutex_exit(&scl->scl_lock); spa_config_exit(spa, locks & ((1 << i) - 1), tag); return (0); } } else { ASSERT(scl->scl_writer != curthread); if (!refcount_is_zero(&scl->scl_count)) { mutex_exit(&scl->scl_lock); spa_config_exit(spa, locks & ((1 << i) - 1), tag); return (0); } scl->scl_writer = curthread; } (void) refcount_add(&scl->scl_count, tag); mutex_exit(&scl->scl_lock); } return (1); } void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw) { int wlocks_held = 0; ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY); for (int i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; if (scl->scl_writer == curthread) wlocks_held |= (1 << i); if (!(locks & (1 << i))) continue; mutex_enter(&scl->scl_lock); if (rw == RW_READER) { while (scl->scl_writer || scl->scl_write_wanted) { cv_wait(&scl->scl_cv, &scl->scl_lock); } } else { ASSERT(scl->scl_writer != curthread); while (!refcount_is_zero(&scl->scl_count)) { scl->scl_write_wanted++; cv_wait(&scl->scl_cv, &scl->scl_lock); scl->scl_write_wanted--; } scl->scl_writer = curthread; } (void) refcount_add(&scl->scl_count, tag); mutex_exit(&scl->scl_lock); } ASSERT3U(wlocks_held, <=, locks); } void spa_config_exit(spa_t *spa, int locks, void *tag) { for (int i = SCL_LOCKS - 1; i >= 0; i--) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; if (!(locks & (1 << i))) continue; mutex_enter(&scl->scl_lock); ASSERT(!refcount_is_zero(&scl->scl_count)); if (refcount_remove(&scl->scl_count, tag) == 0) { ASSERT(scl->scl_writer == NULL || scl->scl_writer == curthread); scl->scl_writer = NULL; /* OK in either case */ cv_broadcast(&scl->scl_cv); } mutex_exit(&scl->scl_lock); } } int spa_config_held(spa_t *spa, int locks, krw_t rw) { int locks_held = 0; for (int i = 0; i < SCL_LOCKS; i++) { spa_config_lock_t *scl = &spa->spa_config_lock[i]; if (!(locks & (1 << i))) continue; if ((rw == RW_READER && !refcount_is_zero(&scl->scl_count)) || (rw == RW_WRITER && scl->scl_writer == curthread)) locks_held |= 1 << i; } return (locks_held); } /* * ========================================================================== * SPA namespace functions * ========================================================================== */ /* * Lookup the named spa_t in the AVL tree. The spa_namespace_lock must be held. * Returns NULL if no matching spa_t is found. */ spa_t * spa_lookup(const char *name) { static spa_t search; /* spa_t is large; don't allocate on stack */ spa_t *spa; avl_index_t where; char *cp; ASSERT(MUTEX_HELD(&spa_namespace_lock)); (void) strlcpy(search.spa_name, name, sizeof (search.spa_name)); /* * If it's a full dataset name, figure out the pool name and * just use that. */ cp = strpbrk(search.spa_name, "/@#"); if (cp != NULL) *cp = '\0'; spa = avl_find(&spa_namespace_avl, &search, &where); return (spa); } /* * Fires when spa_sync has not completed within zfs_deadman_synctime_ms. * If the zfs_deadman_enabled flag is set then it inspects all vdev queues * looking for potentially hung I/Os. */ static void spa_deadman(void *arg, int pending) { spa_t *spa = arg; /* * Disable the deadman timer if the pool is suspended. */ if (spa_suspended(spa)) { #ifdef illumos VERIFY(cyclic_reprogram(spa->spa_deadman_cycid, CY_INFINITY)); #else /* Nothing. just don't schedule any future callouts. */ #endif return; } zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu", (gethrtime() - spa->spa_sync_starttime) / NANOSEC, ++spa->spa_deadman_calls); if (zfs_deadman_enabled) vdev_deadman(spa->spa_root_vdev); #ifdef __FreeBSD__ #ifdef _KERNEL callout_schedule(&spa->spa_deadman_cycid, hz * zfs_deadman_checktime_ms / MILLISEC); #endif #endif } #if defined(__FreeBSD__) && defined(_KERNEL) static void spa_deadman_timeout(void *arg) { spa_t *spa = arg; taskqueue_enqueue(taskqueue_thread, &spa->spa_deadman_task); } #endif /* * Create an uninitialized spa_t with the given name. Requires * spa_namespace_lock. The caller must ensure that the spa_t doesn't already * exist by calling spa_lookup() first. */ spa_t * spa_add(const char *name, nvlist_t *config, const char *altroot) { spa_t *spa; spa_config_dirent_t *dp; #ifdef illumos cyc_handler_t hdlr; cyc_time_t when; #endif ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_cksum_tmpls_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL); for (int t = 0; t < TXG_SIZE; t++) bplist_create(&spa->spa_free_bplist[t]); (void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name)); spa->spa_state = POOL_STATE_UNINITIALIZED; spa->spa_freeze_txg = UINT64_MAX; spa->spa_final_txg = UINT64_MAX; spa->spa_load_max_txg = UINT64_MAX; spa->spa_proc = &p0; spa->spa_proc_state = SPA_PROC_NONE; spa->spa_trust_config = B_TRUE; #ifdef illumos hdlr.cyh_func = spa_deadman; hdlr.cyh_arg = spa; hdlr.cyh_level = CY_LOW_LEVEL; #endif spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); #ifdef illumos /* * This determines how often we need to check for hung I/Os after * the cyclic has already fired. Since checking for hung I/Os is * an expensive operation we don't want to check too frequently. * Instead wait for 5 seconds before checking again. */ when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms); when.cyt_when = CY_INFINITY; mutex_enter(&cpu_lock); spa->spa_deadman_cycid = cyclic_add(&hdlr, &when); mutex_exit(&cpu_lock); #else /* !illumos */ #ifdef _KERNEL /* * callout(9) does not provide a way to initialize a callout with * a function and an argument, so we use callout_reset() to schedule * the callout in the very distant future. Even if that event ever * fires, it should be okayas we won't have any active zio-s. * But normally spa_sync() will reschedule the callout with a proper * timeout. * callout(9) does not allow the callback function to sleep but * vdev_deadman() needs to acquire vq_lock and illumos mutexes are * emulated using sx(9). For this reason spa_deadman_timeout() * will schedule spa_deadman() as task on a taskqueue that allows * sleeping. */ TASK_INIT(&spa->spa_deadman_task, 0, spa_deadman, spa); callout_init(&spa->spa_deadman_cycid, 1); callout_reset_sbt(&spa->spa_deadman_cycid, SBT_MAX, 0, spa_deadman_timeout, spa, 0); #endif #endif refcount_create(&spa->spa_refcount); spa_config_lock_init(spa); avl_add(&spa_namespace_avl, spa); /* * Set the alternate root, if there is one. */ if (altroot) { spa->spa_root = spa_strdup(altroot); spa_active_count++; } spa->spa_alloc_count = spa_allocators; spa->spa_alloc_locks = kmem_zalloc(spa->spa_alloc_count * sizeof (kmutex_t), KM_SLEEP); spa->spa_alloc_trees = kmem_zalloc(spa->spa_alloc_count * sizeof (avl_tree_t), KM_SLEEP); for (int i = 0; i < spa->spa_alloc_count; i++) { mutex_init(&spa->spa_alloc_locks[i], NULL, MUTEX_DEFAULT, NULL); avl_create(&spa->spa_alloc_trees[i], zio_bookmark_compare, sizeof (zio_t), offsetof(zio_t, io_alloc_node)); } /* * Every pool starts with the default cachefile */ list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t), offsetof(spa_config_dirent_t, scd_link)); dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP); dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path); list_insert_head(&spa->spa_config_list, dp); VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (config != NULL) { nvlist_t *features; if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, &features) == 0) { VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0); } VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0); } if (spa->spa_label_features == NULL) { VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, KM_SLEEP) == 0); } spa->spa_min_ashift = INT_MAX; spa->spa_max_ashift = 0; /* * As a pool is being created, treat all features as disabled by * setting SPA_FEATURE_DISABLED for all entries in the feature * refcount cache. */ for (int i = 0; i < SPA_FEATURES; i++) { spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED; } return (spa); } /* * Removes a spa_t from the namespace, freeing up any memory used. Requires * spa_namespace_lock. This is called only after the spa_t has been closed and * deactivated. */ void spa_remove(spa_t *spa) { spa_config_dirent_t *dp; ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0); nvlist_free(spa->spa_config_splitting); avl_remove(&spa_namespace_avl, spa); cv_broadcast(&spa_namespace_cv); if (spa->spa_root) { spa_strfree(spa->spa_root); spa_active_count--; } while ((dp = list_head(&spa->spa_config_list)) != NULL) { list_remove(&spa->spa_config_list, dp); if (dp->scd_path != NULL) spa_strfree(dp->scd_path); kmem_free(dp, sizeof (spa_config_dirent_t)); } for (int i = 0; i < spa->spa_alloc_count; i++) { avl_destroy(&spa->spa_alloc_trees[i]); mutex_destroy(&spa->spa_alloc_locks[i]); } kmem_free(spa->spa_alloc_locks, spa->spa_alloc_count * sizeof (kmutex_t)); kmem_free(spa->spa_alloc_trees, spa->spa_alloc_count * sizeof (avl_tree_t)); list_destroy(&spa->spa_config_list); nvlist_free(spa->spa_label_features); nvlist_free(spa->spa_load_info); + nvlist_free(spa->spa_feat_stats); spa_config_set(spa, NULL); #ifdef illumos mutex_enter(&cpu_lock); if (spa->spa_deadman_cycid != CYCLIC_NONE) cyclic_remove(spa->spa_deadman_cycid); mutex_exit(&cpu_lock); spa->spa_deadman_cycid = CYCLIC_NONE; #else /* !illumos */ #ifdef _KERNEL callout_drain(&spa->spa_deadman_cycid); taskqueue_drain(taskqueue_thread, &spa->spa_deadman_task); #endif #endif refcount_destroy(&spa->spa_refcount); spa_config_lock_destroy(spa); for (int t = 0; t < TXG_SIZE; t++) bplist_destroy(&spa->spa_free_bplist[t]); zio_checksum_templates_free(spa); cv_destroy(&spa->spa_async_cv); cv_destroy(&spa->spa_evicting_os_cv); cv_destroy(&spa->spa_proc_cv); cv_destroy(&spa->spa_scrub_io_cv); cv_destroy(&spa->spa_suspend_cv); mutex_destroy(&spa->spa_async_lock); mutex_destroy(&spa->spa_errlist_lock); mutex_destroy(&spa->spa_errlog_lock); mutex_destroy(&spa->spa_evicting_os_lock); mutex_destroy(&spa->spa_history_lock); mutex_destroy(&spa->spa_proc_lock); mutex_destroy(&spa->spa_props_lock); mutex_destroy(&spa->spa_cksum_tmpls_lock); mutex_destroy(&spa->spa_scrub_lock); mutex_destroy(&spa->spa_suspend_lock); mutex_destroy(&spa->spa_vdev_top_lock); + mutex_destroy(&spa->spa_feat_stats_lock); kmem_free(spa, sizeof (spa_t)); } /* * Given a pool, return the next pool in the namespace, or NULL if there is * none. If 'prev' is NULL, return the first pool. */ spa_t * spa_next(spa_t *prev) { ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (prev) return (AVL_NEXT(&spa_namespace_avl, prev)); else return (avl_first(&spa_namespace_avl)); } /* * ========================================================================== * SPA refcount functions * ========================================================================== */ /* * Add a reference to the given spa_t. Must have at least one reference, or * have the namespace lock held. */ void spa_open_ref(spa_t *spa, void *tag) { ASSERT(refcount_count(&spa->spa_refcount) >= spa->spa_minref || MUTEX_HELD(&spa_namespace_lock)); (void) refcount_add(&spa->spa_refcount, tag); } /* * Remove a reference to the given spa_t. Must have at least one reference, or * have the namespace lock held. */ void spa_close(spa_t *spa, void *tag) { ASSERT(refcount_count(&spa->spa_refcount) > spa->spa_minref || MUTEX_HELD(&spa_namespace_lock)); (void) refcount_remove(&spa->spa_refcount, tag); } /* * Remove a reference to the given spa_t held by a dsl dir that is * being asynchronously released. Async releases occur from a taskq * performing eviction of dsl datasets and dirs. The namespace lock * isn't held and the hold by the object being evicted may contribute to * spa_minref (e.g. dataset or directory released during pool export), * so the asserts in spa_close() do not apply. */ void spa_async_close(spa_t *spa, void *tag) { (void) refcount_remove(&spa->spa_refcount, tag); } /* * Check to see if the spa refcount is zero. Must be called with * spa_namespace_lock held. We really compare against spa_minref, which is the * number of references acquired when opening a pool */ boolean_t spa_refcount_zero(spa_t *spa) { ASSERT(MUTEX_HELD(&spa_namespace_lock)); return (refcount_count(&spa->spa_refcount) == spa->spa_minref); } /* * ========================================================================== * SPA spare and l2cache tracking * ========================================================================== */ /* * Hot spares and cache devices are tracked using the same code below, * for 'auxiliary' devices. */ typedef struct spa_aux { uint64_t aux_guid; uint64_t aux_pool; avl_node_t aux_avl; int aux_count; } spa_aux_t; static inline int spa_aux_compare(const void *a, const void *b) { const spa_aux_t *sa = (const spa_aux_t *)a; const spa_aux_t *sb = (const spa_aux_t *)b; return (AVL_CMP(sa->aux_guid, sb->aux_guid)); } void spa_aux_add(vdev_t *vd, avl_tree_t *avl) { avl_index_t where; spa_aux_t search; spa_aux_t *aux; search.aux_guid = vd->vdev_guid; if ((aux = avl_find(avl, &search, &where)) != NULL) { aux->aux_count++; } else { aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP); aux->aux_guid = vd->vdev_guid; aux->aux_count = 1; avl_insert(avl, aux, where); } } void spa_aux_remove(vdev_t *vd, avl_tree_t *avl) { spa_aux_t search; spa_aux_t *aux; avl_index_t where; search.aux_guid = vd->vdev_guid; aux = avl_find(avl, &search, &where); ASSERT(aux != NULL); if (--aux->aux_count == 0) { avl_remove(avl, aux); kmem_free(aux, sizeof (spa_aux_t)); } else if (aux->aux_pool == spa_guid(vd->vdev_spa)) { aux->aux_pool = 0ULL; } } boolean_t spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl) { spa_aux_t search, *found; search.aux_guid = guid; found = avl_find(avl, &search, NULL); if (pool) { if (found) *pool = found->aux_pool; else *pool = 0ULL; } if (refcnt) { if (found) *refcnt = found->aux_count; else *refcnt = 0; } return (found != NULL); } void spa_aux_activate(vdev_t *vd, avl_tree_t *avl) { spa_aux_t search, *found; avl_index_t where; search.aux_guid = vd->vdev_guid; found = avl_find(avl, &search, &where); ASSERT(found != NULL); ASSERT(found->aux_pool == 0ULL); found->aux_pool = spa_guid(vd->vdev_spa); } /* * Spares are tracked globally due to the following constraints: * * - A spare may be part of multiple pools. * - A spare may be added to a pool even if it's actively in use within * another pool. * - A spare in use in any pool can only be the source of a replacement if * the target is a spare in the same pool. * * We keep track of all spares on the system through the use of a reference * counted AVL tree. When a vdev is added as a spare, or used as a replacement * spare, then we bump the reference count in the AVL tree. In addition, we set * the 'vdev_isspare' member to indicate that the device is a spare (active or * inactive). When a spare is made active (used to replace a device in the * pool), we also keep track of which pool its been made a part of. * * The 'spa_spare_lock' protects the AVL tree. These functions are normally * called under the spa_namespace lock as part of vdev reconfiguration. The * separate spare lock exists for the status query path, which does not need to * be completely consistent with respect to other vdev configuration changes. */ static int spa_spare_compare(const void *a, const void *b) { return (spa_aux_compare(a, b)); } void spa_spare_add(vdev_t *vd) { mutex_enter(&spa_spare_lock); ASSERT(!vd->vdev_isspare); spa_aux_add(vd, &spa_spare_avl); vd->vdev_isspare = B_TRUE; mutex_exit(&spa_spare_lock); } void spa_spare_remove(vdev_t *vd) { mutex_enter(&spa_spare_lock); ASSERT(vd->vdev_isspare); spa_aux_remove(vd, &spa_spare_avl); vd->vdev_isspare = B_FALSE; mutex_exit(&spa_spare_lock); } boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt) { boolean_t found; mutex_enter(&spa_spare_lock); found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl); mutex_exit(&spa_spare_lock); return (found); } void spa_spare_activate(vdev_t *vd) { mutex_enter(&spa_spare_lock); ASSERT(vd->vdev_isspare); spa_aux_activate(vd, &spa_spare_avl); mutex_exit(&spa_spare_lock); } /* * Level 2 ARC devices are tracked globally for the same reasons as spares. * Cache devices currently only support one pool per cache device, and so * for these devices the aux reference count is currently unused beyond 1. */ static int spa_l2cache_compare(const void *a, const void *b) { return (spa_aux_compare(a, b)); } void spa_l2cache_add(vdev_t *vd) { mutex_enter(&spa_l2cache_lock); ASSERT(!vd->vdev_isl2cache); spa_aux_add(vd, &spa_l2cache_avl); vd->vdev_isl2cache = B_TRUE; mutex_exit(&spa_l2cache_lock); } void spa_l2cache_remove(vdev_t *vd) { mutex_enter(&spa_l2cache_lock); ASSERT(vd->vdev_isl2cache); spa_aux_remove(vd, &spa_l2cache_avl); vd->vdev_isl2cache = B_FALSE; mutex_exit(&spa_l2cache_lock); } boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool) { boolean_t found; mutex_enter(&spa_l2cache_lock); found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl); mutex_exit(&spa_l2cache_lock); return (found); } void spa_l2cache_activate(vdev_t *vd) { mutex_enter(&spa_l2cache_lock); ASSERT(vd->vdev_isl2cache); spa_aux_activate(vd, &spa_l2cache_avl); mutex_exit(&spa_l2cache_lock); } /* * ========================================================================== * SPA vdev locking * ========================================================================== */ /* * Lock the given spa_t for the purpose of adding or removing a vdev. * Grabs the global spa_namespace_lock plus the spa config lock for writing. * It returns the next transaction group for the spa_t. */ uint64_t spa_vdev_enter(spa_t *spa) { mutex_enter(&spa->spa_vdev_top_lock); mutex_enter(&spa_namespace_lock); return (spa_vdev_config_enter(spa)); } /* * Internal implementation for spa_vdev_enter(). Used when a vdev * operation requires multiple syncs (i.e. removing a device) while * keeping the spa_namespace_lock held. */ uint64_t spa_vdev_config_enter(spa_t *spa) { ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); return (spa_last_synced_txg(spa) + 1); } /* * Used in combination with spa_vdev_config_enter() to allow the syncing * of multiple transactions without releasing the spa_namespace_lock. */ void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag) { ASSERT(MUTEX_HELD(&spa_namespace_lock)); int config_changed = B_FALSE; ASSERT(txg > spa_last_synced_txg(spa)); spa->spa_pending_vdev = NULL; /* * Reassess the DTLs. */ vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE); if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) { config_changed = B_TRUE; spa->spa_config_generation++; } /* * Verify the metaslab classes. */ ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0); ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0); spa_config_exit(spa, SCL_ALL, spa); /* * Panic the system if the specified tag requires it. This * is useful for ensuring that configurations are updated * transactionally. */ if (zio_injection_enabled) zio_handle_panic_injection(spa, tag, 0); /* * Note: this txg_wait_synced() is important because it ensures * that there won't be more than one config change per txg. * This allows us to use the txg as the generation number. */ if (error == 0) txg_wait_synced(spa->spa_dsl_pool, txg); if (vd != NULL) { ASSERT(!vd->vdev_detached || vd->vdev_dtl_sm == NULL); if (vd->vdev_ops->vdev_op_leaf) { mutex_enter(&vd->vdev_initialize_lock); vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED); mutex_exit(&vd->vdev_initialize_lock); } spa_config_enter(spa, SCL_ALL, spa, RW_WRITER); vdev_free(vd); spa_config_exit(spa, SCL_ALL, spa); } /* * If the config changed, update the config cache. */ if (config_changed) spa_write_cachefile(spa, B_FALSE, B_TRUE); } /* * Unlock the spa_t after adding or removing a vdev. Besides undoing the * locking of spa_vdev_enter(), we also want make sure the transactions have * synced to disk, and then update the global configuration cache with the new * information. */ int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error) { spa_vdev_config_exit(spa, vd, txg, error, FTAG); mutex_exit(&spa_namespace_lock); mutex_exit(&spa->spa_vdev_top_lock); return (error); } /* * Lock the given spa_t for the purpose of changing vdev state. */ void spa_vdev_state_enter(spa_t *spa, int oplocks) { int locks = SCL_STATE_ALL | oplocks; /* * Root pools may need to read of the underlying devfs filesystem * when opening up a vdev. Unfortunately if we're holding the * SCL_ZIO lock it will result in a deadlock when we try to issue * the read from the root filesystem. Instead we "prefetch" * the associated vnodes that we need prior to opening the * underlying devices and cache them so that we can prevent * any I/O when we are doing the actual open. */ if (spa_is_root(spa)) { int low = locks & ~(SCL_ZIO - 1); int high = locks & ~low; spa_config_enter(spa, high, spa, RW_WRITER); vdev_hold(spa->spa_root_vdev); spa_config_enter(spa, low, spa, RW_WRITER); } else { spa_config_enter(spa, locks, spa, RW_WRITER); } spa->spa_vdev_locks = locks; } int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error) { boolean_t config_changed = B_FALSE; if (vd != NULL || error == 0) vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev, 0, 0, B_FALSE); if (vd != NULL) { vdev_state_dirty(vd->vdev_top); config_changed = B_TRUE; spa->spa_config_generation++; } if (spa_is_root(spa)) vdev_rele(spa->spa_root_vdev); ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL); spa_config_exit(spa, spa->spa_vdev_locks, spa); /* * If anything changed, wait for it to sync. This ensures that, * from the system administrator's perspective, zpool(1M) commands * are synchronous. This is important for things like zpool offline: * when the command completes, you expect no further I/O from ZFS. */ if (vd != NULL) txg_wait_synced(spa->spa_dsl_pool, 0); /* * If the config changed, update the config cache. */ if (config_changed) { mutex_enter(&spa_namespace_lock); spa_write_cachefile(spa, B_FALSE, B_TRUE); mutex_exit(&spa_namespace_lock); } return (error); } /* * ========================================================================== * Miscellaneous functions * ========================================================================== */ void spa_activate_mos_feature(spa_t *spa, const char *feature, dmu_tx_t *tx) { if (!nvlist_exists(spa->spa_label_features, feature)) { fnvlist_add_boolean(spa->spa_label_features, feature); /* * When we are creating the pool (tx_txg==TXG_INITIAL), we can't * dirty the vdev config because lock SCL_CONFIG is not held. * Thankfully, in this case we don't need to dirty the config * because it will be written out anyway when we finish * creating the pool. */ if (tx->tx_txg != TXG_INITIAL) vdev_config_dirty(spa->spa_root_vdev); } } void spa_deactivate_mos_feature(spa_t *spa, const char *feature) { if (nvlist_remove_all(spa->spa_label_features, feature) == 0) vdev_config_dirty(spa->spa_root_vdev); } /* * Rename a spa_t. */ int spa_rename(const char *name, const char *newname) { spa_t *spa; int err; /* * Lookup the spa_t and grab the config lock for writing. We need to * actually open the pool so that we can sync out the necessary labels. * It's OK to call spa_open() with the namespace lock held because we * allow recursive calls for other reasons. */ mutex_enter(&spa_namespace_lock); if ((err = spa_open(name, &spa, FTAG)) != 0) { mutex_exit(&spa_namespace_lock); return (err); } spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); avl_remove(&spa_namespace_avl, spa); (void) strlcpy(spa->spa_name, newname, sizeof (spa->spa_name)); avl_add(&spa_namespace_avl, spa); /* * Sync all labels to disk with the new names by marking the root vdev * dirty and waiting for it to sync. It will pick up the new pool name * during the sync. */ vdev_config_dirty(spa->spa_root_vdev); spa_config_exit(spa, SCL_ALL, FTAG); txg_wait_synced(spa->spa_dsl_pool, 0); /* * Sync the updated config cache. */ spa_write_cachefile(spa, B_FALSE, B_TRUE); spa_close(spa, FTAG); mutex_exit(&spa_namespace_lock); return (0); } /* * Return the spa_t associated with given pool_guid, if it exists. If * device_guid is non-zero, determine whether the pool exists *and* contains * a device with the specified device_guid. */ spa_t * spa_by_guid(uint64_t pool_guid, uint64_t device_guid) { spa_t *spa; avl_tree_t *t = &spa_namespace_avl; ASSERT(MUTEX_HELD(&spa_namespace_lock)); for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { if (spa->spa_state == POOL_STATE_UNINITIALIZED) continue; if (spa->spa_root_vdev == NULL) continue; if (spa_guid(spa) == pool_guid) { if (device_guid == 0) break; if (vdev_lookup_by_guid(spa->spa_root_vdev, device_guid) != NULL) break; /* * Check any devices we may be in the process of adding. */ if (spa->spa_pending_vdev) { if (vdev_lookup_by_guid(spa->spa_pending_vdev, device_guid) != NULL) break; } } } return (spa); } /* * Determine whether a pool with the given pool_guid exists. */ boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid) { return (spa_by_guid(pool_guid, device_guid) != NULL); } char * spa_strdup(const char *s) { size_t len; char *new; len = strlen(s); new = kmem_alloc(len + 1, KM_SLEEP); bcopy(s, new, len); new[len] = '\0'; return (new); } void spa_strfree(char *s) { kmem_free(s, strlen(s) + 1); } uint64_t spa_get_random(uint64_t range) { uint64_t r; ASSERT(range != 0); (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t)); return (r % range); } uint64_t spa_generate_guid(spa_t *spa) { uint64_t guid = spa_get_random(-1ULL); if (spa != NULL) { while (guid == 0 || spa_guid_exists(spa_guid(spa), guid)) guid = spa_get_random(-1ULL); } else { while (guid == 0 || spa_guid_exists(guid, 0)) guid = spa_get_random(-1ULL); } return (guid); } void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp) { char type[256]; char *checksum = NULL; char *compress = NULL; if (bp != NULL) { if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) { dmu_object_byteswap_t bswap = DMU_OT_BYTESWAP(BP_GET_TYPE(bp)); (void) snprintf(type, sizeof (type), "bswap %s %s", DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ? "metadata" : "data", dmu_ot_byteswap[bswap].ob_name); } else { (void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name, sizeof (type)); } if (!BP_IS_EMBEDDED(bp)) { checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name; } compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name; } SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum, compress); } void spa_freeze(spa_t *spa) { uint64_t freeze_txg = 0; spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); if (spa->spa_freeze_txg == UINT64_MAX) { freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE; spa->spa_freeze_txg = freeze_txg; } spa_config_exit(spa, SCL_ALL, FTAG); if (freeze_txg != 0) txg_wait_synced(spa_get_dsl(spa), freeze_txg); } void zfs_panic_recover(const char *fmt, ...) { va_list adx; va_start(adx, fmt); vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx); va_end(adx); } /* * This is a stripped-down version of strtoull, suitable only for converting * lowercase hexadecimal numbers that don't overflow. */ uint64_t zfs_strtonum(const char *str, char **nptr) { uint64_t val = 0; char c; int digit; while ((c = *str) != '\0') { if (c >= '0' && c <= '9') digit = c - '0'; else if (c >= 'a' && c <= 'f') digit = 10 + c - 'a'; else break; val *= 16; val += digit; str++; } if (nptr) *nptr = (char *)str; return (val); } /* * ========================================================================== * Accessor functions * ========================================================================== */ boolean_t spa_shutting_down(spa_t *spa) { return (spa->spa_async_suspended); } dsl_pool_t * spa_get_dsl(spa_t *spa) { return (spa->spa_dsl_pool); } boolean_t spa_is_initializing(spa_t *spa) { return (spa->spa_is_initializing); } boolean_t spa_indirect_vdevs_loaded(spa_t *spa) { return (spa->spa_indirect_vdevs_loaded); } blkptr_t * spa_get_rootblkptr(spa_t *spa) { return (&spa->spa_ubsync.ub_rootbp); } void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp) { spa->spa_uberblock.ub_rootbp = *bp; } void spa_altroot(spa_t *spa, char *buf, size_t buflen) { if (spa->spa_root == NULL) buf[0] = '\0'; else (void) strncpy(buf, spa->spa_root, buflen); } int spa_sync_pass(spa_t *spa) { return (spa->spa_sync_pass); } char * spa_name(spa_t *spa) { return (spa->spa_name); } uint64_t spa_guid(spa_t *spa) { dsl_pool_t *dp = spa_get_dsl(spa); uint64_t guid; /* * If we fail to parse the config during spa_load(), we can go through * the error path (which posts an ereport) and end up here with no root * vdev. We stash the original pool guid in 'spa_config_guid' to handle * this case. */ if (spa->spa_root_vdev == NULL) return (spa->spa_config_guid); guid = spa->spa_last_synced_guid != 0 ? spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid; /* * Return the most recently synced out guid unless we're * in syncing context. */ if (dp && dsl_pool_sync_context(dp)) return (spa->spa_root_vdev->vdev_guid); else return (guid); } uint64_t spa_load_guid(spa_t *spa) { /* * This is a GUID that exists solely as a reference for the * purposes of the arc. It is generated at load time, and * is never written to persistent storage. */ return (spa->spa_load_guid); } uint64_t spa_last_synced_txg(spa_t *spa) { return (spa->spa_ubsync.ub_txg); } uint64_t spa_first_txg(spa_t *spa) { return (spa->spa_first_txg); } uint64_t spa_syncing_txg(spa_t *spa) { return (spa->spa_syncing_txg); } /* * Return the last txg where data can be dirtied. The final txgs * will be used to just clear out any deferred frees that remain. */ uint64_t spa_final_dirty_txg(spa_t *spa) { return (spa->spa_final_txg - TXG_DEFER_SIZE); } pool_state_t spa_state(spa_t *spa) { return (spa->spa_state); } spa_load_state_t spa_load_state(spa_t *spa) { return (spa->spa_load_state); } uint64_t spa_freeze_txg(spa_t *spa) { return (spa->spa_freeze_txg); } /* ARGSUSED */ uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize) { return (lsize * spa_asize_inflation); } /* * Return the amount of slop space in bytes. It is 1/32 of the pool (3.2%), * or at least 128MB, unless that would cause it to be more than half the * pool size. * * See the comment above spa_slop_shift for details. */ uint64_t spa_get_slop_space(spa_t *spa) { uint64_t space = spa_get_dspace(spa); return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop))); } uint64_t spa_get_dspace(spa_t *spa) { return (spa->spa_dspace); } uint64_t spa_get_checkpoint_space(spa_t *spa) { return (spa->spa_checkpoint_info.sci_dspace); } void spa_update_dspace(spa_t *spa) { spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) + ddt_get_dedup_dspace(spa); if (spa->spa_vdev_removal != NULL) { /* * We can't allocate from the removing device, so * subtract its size. This prevents the DMU/DSL from * filling up the (now smaller) pool while we are in the * middle of removing the device. * * Note that the DMU/DSL doesn't actually know or care * how much space is allocated (it does its own tracking * of how much space has been logically used). So it * doesn't matter that the data we are moving may be * allocated twice (on the old device and the new * device). */ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); vdev_t *vd = vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id); spa->spa_dspace -= spa_deflate(spa) ? vd->vdev_stat.vs_dspace : vd->vdev_stat.vs_space; spa_config_exit(spa, SCL_VDEV, FTAG); } } /* * Return the failure mode that has been set to this pool. The default * behavior will be to block all I/Os when a complete failure occurs. */ uint8_t spa_get_failmode(spa_t *spa) { return (spa->spa_failmode); } boolean_t spa_suspended(spa_t *spa) { return (spa->spa_suspended); } uint64_t spa_version(spa_t *spa) { return (spa->spa_ubsync.ub_version); } boolean_t spa_deflate(spa_t *spa) { return (spa->spa_deflate); } metaslab_class_t * spa_normal_class(spa_t *spa) { return (spa->spa_normal_class); } metaslab_class_t * spa_log_class(spa_t *spa) { return (spa->spa_log_class); } void spa_evicting_os_register(spa_t *spa, objset_t *os) { mutex_enter(&spa->spa_evicting_os_lock); list_insert_head(&spa->spa_evicting_os_list, os); mutex_exit(&spa->spa_evicting_os_lock); } void spa_evicting_os_deregister(spa_t *spa, objset_t *os) { mutex_enter(&spa->spa_evicting_os_lock); list_remove(&spa->spa_evicting_os_list, os); cv_broadcast(&spa->spa_evicting_os_cv); mutex_exit(&spa->spa_evicting_os_lock); } void spa_evicting_os_wait(spa_t *spa) { mutex_enter(&spa->spa_evicting_os_lock); while (!list_is_empty(&spa->spa_evicting_os_list)) cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock); mutex_exit(&spa->spa_evicting_os_lock); dmu_buf_user_evict_wait(); } int spa_max_replication(spa_t *spa) { /* * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to * handle BPs with more than one DVA allocated. Set our max * replication level accordingly. */ if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS) return (1); return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override)); } int spa_prev_software_version(spa_t *spa) { return (spa->spa_prev_software_version); } uint64_t spa_deadman_synctime(spa_t *spa) { return (spa->spa_deadman_synctime); } uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva) { uint64_t asize = DVA_GET_ASIZE(dva); uint64_t dsize = asize; ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0); if (asize != 0 && spa->spa_deflate) { uint64_t vdev = DVA_GET_VDEV(dva); vdev_t *vd = vdev_lookup_top(spa, vdev); if (vd == NULL) { panic( "dva_get_dsize_sync(): bad DVA %llu:%llu", (u_longlong_t)vdev, (u_longlong_t)asize); } dsize = (asize >> SPA_MINBLOCKSHIFT) * vd->vdev_deflate_ratio; } return (dsize); } uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp) { uint64_t dsize = 0; for (int d = 0; d < BP_GET_NDVAS(bp); d++) dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); return (dsize); } uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp) { uint64_t dsize = 0; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); for (int d = 0; d < BP_GET_NDVAS(bp); d++) dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]); spa_config_exit(spa, SCL_VDEV, FTAG); return (dsize); } uint64_t spa_dirty_data(spa_t *spa) { return (spa->spa_dsl_pool->dp_dirty_total); } /* * ========================================================================== * Initialization and Termination * ========================================================================== */ static int spa_name_compare(const void *a1, const void *a2) { const spa_t *s1 = a1; const spa_t *s2 = a2; int s; s = strcmp(s1->spa_name, s2->spa_name); return (AVL_ISIGN(s)); } int spa_busy(void) { return (spa_active_count); } void spa_boot_init() { spa_config_load(); } #ifdef _KERNEL EVENTHANDLER_DEFINE(mountroot, spa_boot_init, NULL, 0); #endif void spa_init(int mode) { mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL); avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t), offsetof(spa_t, spa_avl)); avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t), offsetof(spa_aux_t, aux_avl)); avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t), offsetof(spa_aux_t, aux_avl)); spa_mode_global = mode; #ifdef illumos #ifdef _KERNEL spa_arch_init(); #else if (spa_mode_global != FREAD && dprintf_find_string("watch")) { arc_procfd = open("/proc/self/ctl", O_WRONLY); if (arc_procfd == -1) { perror("could not enable watchpoints: " "opening /proc/self/ctl failed: "); } else { arc_watch = B_TRUE; } } #endif #endif /* illumos */ refcount_sysinit(); unique_init(); range_tree_init(); metaslab_alloc_trace_init(); zio_init(); lz4_init(); dmu_init(); zil_init(); vdev_cache_stat_init(); vdev_file_init(); zfs_prop_init(); zpool_prop_init(); zpool_feature_init(); spa_config_load(); l2arc_start(); scan_init(); dsl_scan_global_init(); #ifndef illumos #ifdef _KERNEL zfs_deadman_init(); #endif #endif /* !illumos */ } void spa_fini(void) { l2arc_stop(); spa_evict_all(); vdev_file_fini(); vdev_cache_stat_fini(); zil_fini(); dmu_fini(); lz4_fini(); zio_fini(); metaslab_alloc_trace_fini(); range_tree_fini(); unique_fini(); refcount_fini(); scan_fini(); avl_destroy(&spa_namespace_avl); avl_destroy(&spa_spare_avl); avl_destroy(&spa_l2cache_avl); cv_destroy(&spa_namespace_cv); mutex_destroy(&spa_namespace_lock); mutex_destroy(&spa_spare_lock); mutex_destroy(&spa_l2cache_lock); } /* * Return whether this pool has slogs. No locking needed. * It's not a problem if the wrong answer is returned as it's only for * performance and not correctness */ boolean_t spa_has_slogs(spa_t *spa) { return (spa->spa_log_class->mc_rotor != NULL); } spa_log_state_t spa_get_log_state(spa_t *spa) { return (spa->spa_log_state); } void spa_set_log_state(spa_t *spa, spa_log_state_t state) { spa->spa_log_state = state; } boolean_t spa_is_root(spa_t *spa) { return (spa->spa_is_root); } boolean_t spa_writeable(spa_t *spa) { return (!!(spa->spa_mode & FWRITE) && spa->spa_trust_config); } /* * Returns true if there is a pending sync task in any of the current * syncing txg, the current quiescing txg, or the current open txg. */ boolean_t spa_has_pending_synctask(spa_t *spa) { return (!txg_all_lists_empty(&spa->spa_dsl_pool->dp_sync_tasks) || !txg_all_lists_empty(&spa->spa_dsl_pool->dp_early_sync_tasks)); } int spa_mode(spa_t *spa) { return (spa->spa_mode); } uint64_t spa_bootfs(spa_t *spa) { return (spa->spa_bootfs); } uint64_t spa_delegation(spa_t *spa) { return (spa->spa_delegation); } objset_t * spa_meta_objset(spa_t *spa) { return (spa->spa_meta_objset); } enum zio_checksum spa_dedup_checksum(spa_t *spa) { return (spa->spa_dedup_checksum); } /* * Reset pool scan stat per scan pass (or reboot). */ void spa_scan_stat_init(spa_t *spa) { /* data not stored on disk */ spa->spa_scan_pass_start = gethrestime_sec(); if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan)) spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start; else spa->spa_scan_pass_scrub_pause = 0; spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_exam = 0; spa->spa_scan_pass_issued = 0; vdev_scan_stat_init(spa->spa_root_vdev); } /* * Get scan stats for zpool status reports */ int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps) { dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL; if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE) return (SET_ERROR(ENOENT)); bzero(ps, sizeof (pool_scan_stat_t)); /* data stored on disk */ ps->pss_func = scn->scn_phys.scn_func; ps->pss_state = scn->scn_phys.scn_state; ps->pss_start_time = scn->scn_phys.scn_start_time; ps->pss_end_time = scn->scn_phys.scn_end_time; ps->pss_to_examine = scn->scn_phys.scn_to_examine; ps->pss_to_process = scn->scn_phys.scn_to_process; ps->pss_processed = scn->scn_phys.scn_processed; ps->pss_errors = scn->scn_phys.scn_errors; ps->pss_examined = scn->scn_phys.scn_examined; ps->pss_issued = scn->scn_issued_before_pass + spa->spa_scan_pass_issued; /* data not stored on disk */ ps->pss_pass_start = spa->spa_scan_pass_start; ps->pss_pass_exam = spa->spa_scan_pass_exam; ps->pss_pass_issued = spa->spa_scan_pass_issued; ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause; ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused; return (0); } int spa_maxblocksize(spa_t *spa) { if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) return (SPA_MAXBLOCKSIZE); else return (SPA_OLD_MAXBLOCKSIZE); } /* * Returns the txg that the last device removal completed. No indirect mappings * have been added since this txg. */ uint64_t spa_get_last_removal_txg(spa_t *spa) { uint64_t vdevid; uint64_t ret = -1ULL; spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); /* * sr_prev_indirect_vdev is only modified while holding all the * config locks, so it is sufficient to hold SCL_VDEV as reader when * examining it. */ vdevid = spa->spa_removing_phys.sr_prev_indirect_vdev; while (vdevid != -1ULL) { vdev_t *vd = vdev_lookup_top(spa, vdevid); vdev_indirect_births_t *vib = vd->vdev_indirect_births; ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); /* * If the removal did not remap any data, we don't care. */ if (vdev_indirect_births_count(vib) != 0) { ret = vdev_indirect_births_last_entry_txg(vib); break; } vdevid = vd->vdev_indirect_config.vic_prev_indirect_vdev; } spa_config_exit(spa, SCL_VDEV, FTAG); IMPLY(ret != -1ULL, spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL)); return (ret); } boolean_t spa_trust_config(spa_t *spa) { return (spa->spa_trust_config); } uint64_t spa_missing_tvds_allowed(spa_t *spa) { return (spa->spa_missing_tvds_allowed); } void spa_set_missing_tvds(spa_t *spa, uint64_t missing) { spa->spa_missing_tvds = missing; } boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa) { vdev_t *rvd = spa->spa_root_vdev; for (uint64_t c = 0; c < rvd->vdev_children; c++) { if (!vdev_is_spacemap_addressable(rvd->vdev_child[c])) return (B_FALSE); } return (B_TRUE); } boolean_t spa_has_checkpoint(spa_t *spa) { return (spa->spa_checkpoint_txg != 0); } boolean_t spa_importing_readonly_checkpoint(spa_t *spa) { return ((spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT) && spa->spa_mode == FREAD); } uint64_t spa_min_claim_txg(spa_t *spa) { uint64_t checkpoint_txg = spa->spa_uberblock.ub_checkpoint_txg; if (checkpoint_txg != 0) return (checkpoint_txg + 1); return (spa->spa_first_txg); } /* * If there is a checkpoint, async destroys may consume more space from * the pool instead of freeing it. In an attempt to save the pool from * getting suspended when it is about to run out of space, we stop * processing async destroys. */ boolean_t spa_suspend_async_destroy(spa_t *spa) { dsl_pool_t *dp = spa_get_dsl(spa); uint64_t unreserved = dsl_pool_unreserved_space(dp, ZFS_SPACE_CHECK_EXTRA_RESERVED); uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes; uint64_t avail = (unreserved > used) ? (unreserved - used) : 0; if (spa_has_checkpoint(spa) && avail == 0) return (B_TRUE); return (B_FALSE); } Index: projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (revision 337615) +++ projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h (revision 337616) @@ -1,421 +1,423 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2013 Martin Matuska . All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2017 Datto Inc. */ #ifndef _SYS_SPA_IMPL_H #define _SYS_SPA_IMPL_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif typedef struct spa_error_entry { zbookmark_phys_t se_bookmark; char *se_name; avl_node_t se_avl; } spa_error_entry_t; typedef struct spa_history_phys { uint64_t sh_pool_create_len; /* ending offset of zpool create */ uint64_t sh_phys_max_off; /* physical EOF */ uint64_t sh_bof; /* logical BOF */ uint64_t sh_eof; /* logical EOF */ uint64_t sh_records_lost; /* num of records overwritten */ } spa_history_phys_t; /* * All members must be uint64_t, for byteswap purposes. */ typedef struct spa_removing_phys { uint64_t sr_state; /* dsl_scan_state_t */ /* * The vdev ID that we most recently attempted to remove, * or -1 if no removal has been attempted. */ uint64_t sr_removing_vdev; /* * The vdev ID that we most recently successfully removed, * or -1 if no devices have been removed. */ uint64_t sr_prev_indirect_vdev; uint64_t sr_start_time; uint64_t sr_end_time; /* * Note that we can not use the space map's or indirect mapping's * accounting as a substitute for these values, because we need to * count frees of not-yet-copied data as though it did the copy. * Otherwise, we could get into a situation where copied > to_copy, * or we complete before copied == to_copy. */ uint64_t sr_to_copy; /* bytes that need to be copied */ uint64_t sr_copied; /* bytes that have been copied or freed */ } spa_removing_phys_t; /* * This struct is stored as an entry in the DMU_POOL_DIRECTORY_OBJECT * (with key DMU_POOL_CONDENSING_INDIRECT). It is present if a condense * of an indirect vdev's mapping object is in progress. */ typedef struct spa_condensing_indirect_phys { /* * The vdev ID of the indirect vdev whose indirect mapping is * being condensed. */ uint64_t scip_vdev; /* * The vdev's old obsolete spacemap. This spacemap's contents are * being integrated into the new mapping. */ uint64_t scip_prev_obsolete_sm_object; /* * The new mapping object that is being created. */ uint64_t scip_next_mapping_object; } spa_condensing_indirect_phys_t; struct spa_aux_vdev { uint64_t sav_object; /* MOS object for device list */ nvlist_t *sav_config; /* cached device config */ vdev_t **sav_vdevs; /* devices */ int sav_count; /* number devices */ boolean_t sav_sync; /* sync the device list */ nvlist_t **sav_pending; /* pending device additions */ uint_t sav_npending; /* # pending devices */ }; typedef struct spa_config_lock { kmutex_t scl_lock; kthread_t *scl_writer; int scl_write_wanted; kcondvar_t scl_cv; refcount_t scl_count; } spa_config_lock_t; typedef struct spa_config_dirent { list_node_t scd_link; char *scd_path; } spa_config_dirent_t; typedef enum zio_taskq_type { ZIO_TASKQ_ISSUE = 0, ZIO_TASKQ_ISSUE_HIGH, ZIO_TASKQ_INTERRUPT, ZIO_TASKQ_INTERRUPT_HIGH, ZIO_TASKQ_TYPES } zio_taskq_type_t; /* * State machine for the zpool-poolname process. The states transitions * are done as follows: * * From To Routine * PROC_NONE -> PROC_CREATED spa_activate() * PROC_CREATED -> PROC_ACTIVE spa_thread() * PROC_ACTIVE -> PROC_DEACTIVATE spa_deactivate() * PROC_DEACTIVATE -> PROC_GONE spa_thread() * PROC_GONE -> PROC_NONE spa_deactivate() */ typedef enum spa_proc_state { SPA_PROC_NONE, /* spa_proc = &p0, no process created */ SPA_PROC_CREATED, /* spa_activate() has proc, is waiting */ SPA_PROC_ACTIVE, /* taskqs created, spa_proc set */ SPA_PROC_DEACTIVATE, /* spa_deactivate() requests process exit */ SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */ } spa_proc_state_t; typedef struct spa_taskqs { uint_t stqs_count; taskq_t **stqs_taskq; } spa_taskqs_t; typedef enum spa_all_vdev_zap_action { AVZ_ACTION_NONE = 0, AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */ AVZ_ACTION_REBUILD, /* Populate the new AVZ, see spa_avz_rebuild */ AVZ_ACTION_INITIALIZE } spa_avz_action_t; typedef enum spa_config_source { SPA_CONFIG_SRC_NONE = 0, SPA_CONFIG_SRC_SCAN, /* scan of path (default: /dev/dsk) */ SPA_CONFIG_SRC_CACHEFILE, /* any cachefile */ SPA_CONFIG_SRC_TRYIMPORT, /* returned from call to tryimport */ SPA_CONFIG_SRC_SPLIT, /* new pool in a pool split */ SPA_CONFIG_SRC_MOS /* MOS, but not always from right txg */ } spa_config_source_t; struct spa { /* * Fields protected by spa_namespace_lock. */ char spa_name[ZFS_MAX_DATASET_NAME_LEN]; /* pool name */ char *spa_comment; /* comment */ avl_node_t spa_avl; /* node in spa_namespace_avl */ nvlist_t *spa_config; /* last synced config */ nvlist_t *spa_config_syncing; /* currently syncing config */ nvlist_t *spa_config_splitting; /* config for splitting */ nvlist_t *spa_load_info; /* info and errors from load */ uint64_t spa_config_txg; /* txg of last config change */ int spa_sync_pass; /* iterate-to-convergence */ pool_state_t spa_state; /* pool state */ int spa_inject_ref; /* injection references */ uint8_t spa_sync_on; /* sync threads are running */ spa_load_state_t spa_load_state; /* current load operation */ boolean_t spa_indirect_vdevs_loaded; /* mappings loaded? */ boolean_t spa_trust_config; /* do we trust vdev tree? */ spa_config_source_t spa_config_source; /* where config comes from? */ uint64_t spa_import_flags; /* import specific flags */ spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES]; dsl_pool_t *spa_dsl_pool; boolean_t spa_is_initializing; /* true while opening pool */ metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ uint64_t spa_first_txg; /* first txg after spa_open() */ uint64_t spa_final_txg; /* txg of export/destroy */ uint64_t spa_freeze_txg; /* freeze pool at this txg */ uint64_t spa_load_max_txg; /* best initial ub_txg */ uint64_t spa_claim_max_txg; /* highest claimed birth txg */ timespec_t spa_loaded_ts; /* 1st successful open time */ objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */ list_t spa_evicting_os_list; /* Objsets being evicted. */ kcondvar_t spa_evicting_os_cv; /* Objset Eviction Completion */ txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */ vdev_t *spa_root_vdev; /* top-level vdev container */ int spa_min_ashift; /* of vdevs in normal class */ int spa_max_ashift; /* of vdevs in normal class */ uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_load_guid; /* spa_load initialized guid */ uint64_t spa_last_synced_guid; /* last synced guid */ list_t spa_config_dirty_list; /* vdevs with dirty config */ list_t spa_state_dirty_list; /* vdevs with dirty state */ /* * spa_alloc_locks and spa_alloc_trees are arrays, whose lengths are * stored in spa_alloc_count. There is one tree and one lock for each * allocator, to help improve allocation performance in write-heavy * workloads. */ kmutex_t *spa_alloc_locks; avl_tree_t *spa_alloc_trees; int spa_alloc_count; spa_aux_vdev_t spa_spares; /* hot spares */ spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */ nvlist_t *spa_label_features; /* Features for reading MOS */ uint64_t spa_config_object; /* MOS object for pool config */ uint64_t spa_config_generation; /* config generation number */ uint64_t spa_syncing_txg; /* txg currently syncing */ bpobj_t spa_deferred_bpobj; /* deferred-free bplist */ bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */ zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */ /* checksum context templates */ kmutex_t spa_cksum_tmpls_lock; void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; uberblock_t spa_ubsync; /* last synced uberblock */ uberblock_t spa_uberblock; /* current uberblock */ boolean_t spa_extreme_rewind; /* rewind past deferred frees */ uint64_t spa_last_io; /* lbolt of last non-scan I/O */ kmutex_t spa_scrub_lock; /* resilver/scrub lock */ uint64_t spa_scrub_inflight; /* in-flight scrub bytes */ uint64_t spa_load_verify_ios; /* in-flight verifications IOs */ kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */ uint8_t spa_scrub_active; /* active or suspended? */ uint8_t spa_scrub_type; /* type of scrub we're doing */ uint8_t spa_scrub_finished; /* indicator to rotate logs */ uint8_t spa_scrub_started; /* started since last boot */ uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */ uint64_t spa_scan_pass_start; /* start time per pass/reboot */ uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */ uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_issued; /* issued bytes per pass */ kmutex_t spa_async_lock; /* protect async state */ kthread_t *spa_async_thread; /* thread doing async task */ kthread_t *spa_async_thread_vd; /* thread doing vd async task */ int spa_async_suspended; /* async tasks suspended */ kcondvar_t spa_async_cv; /* wait for thread_exit() */ uint16_t spa_async_tasks; /* async task mask */ uint64_t spa_missing_tvds; /* unopenable tvds on load */ uint64_t spa_missing_tvds_allowed; /* allow loading spa? */ spa_removing_phys_t spa_removing_phys; spa_vdev_removal_t *spa_vdev_removal; spa_condensing_indirect_phys_t spa_condensing_indirect_phys; spa_condensing_indirect_t *spa_condensing_indirect; zthr_t *spa_condense_zthr; /* zthr doing condense. */ uint64_t spa_checkpoint_txg; /* the txg of the checkpoint */ spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */ zthr_t *spa_checkpoint_discard_zthr; char *spa_root; /* alternate root directory */ uint64_t spa_ena; /* spa-wide ereport ENA */ int spa_last_open_failed; /* error if last open failed */ uint64_t spa_last_ubsync_txg; /* "best" uberblock txg */ uint64_t spa_last_ubsync_txg_ts; /* timestamp from that ub */ uint64_t spa_load_txg; /* ub txg that loaded */ uint64_t spa_load_txg_ts; /* timestamp from that ub */ uint64_t spa_load_meta_errors; /* verify metadata err count */ uint64_t spa_load_data_errors; /* verify data err count */ uint64_t spa_verify_min_txg; /* start txg of verify scrub */ kmutex_t spa_errlog_lock; /* error log lock */ uint64_t spa_errlog_last; /* last error log object */ uint64_t spa_errlog_scrub; /* scrub error log object */ kmutex_t spa_errlist_lock; /* error list/ereport lock */ avl_tree_t spa_errlist_last; /* last error list */ avl_tree_t spa_errlist_scrub; /* scrub error list */ uint64_t spa_deflate; /* should we deflate? */ uint64_t spa_history; /* history object */ kmutex_t spa_history_lock; /* history lock */ vdev_t *spa_pending_vdev; /* pending vdev additions */ kmutex_t spa_props_lock; /* property lock */ uint64_t spa_pool_props_object; /* object for properties */ uint64_t spa_bootfs; /* default boot filesystem */ uint64_t spa_failmode; /* failure mode for the pool */ uint64_t spa_delegation; /* delegation on/off */ list_t spa_config_list; /* previous cache file(s) */ /* per-CPU array of root of async I/O: */ zio_t **spa_async_zio_root; zio_t *spa_suspend_zio_root; /* root of all suspended I/O */ zio_t *spa_txg_zio[TXG_SIZE]; /* spa_sync() waits for this */ kmutex_t spa_suspend_lock; /* protects suspend_zio_root */ kcondvar_t spa_suspend_cv; /* notification of resume */ uint8_t spa_suspended; /* pool is suspended */ uint8_t spa_claiming; /* pool is doing zil_claim() */ boolean_t spa_is_root; /* pool is root */ int spa_minref; /* num refs when first opened */ int spa_mode; /* FREAD | FWRITE */ spa_log_state_t spa_log_state; /* log state */ uint64_t spa_autoexpand; /* lun expansion on/off */ uint64_t spa_bootsize; /* efi system partition size */ ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */ uint64_t spa_ddt_stat_object; /* DDT statistics */ uint64_t spa_dedup_ditto; /* dedup ditto threshold */ uint64_t spa_dedup_checksum; /* default dedup checksum */ uint64_t spa_dspace; /* dspace in normal class */ kmutex_t spa_vdev_top_lock; /* dueling offline/remove */ kmutex_t spa_proc_lock; /* protects spa_proc* */ kcondvar_t spa_proc_cv; /* spa_proc_state transitions */ spa_proc_state_t spa_proc_state; /* see definition */ struct proc *spa_proc; /* "zpool-poolname" process */ uint64_t spa_did; /* if procp != p0, did of t1 */ kthread_t *spa_trim_thread; /* thread sending TRIM I/Os */ kmutex_t spa_trim_lock; /* protects spa_trim_cv */ kcondvar_t spa_trim_cv; /* used to notify TRIM thread */ boolean_t spa_autoreplace; /* autoreplace set in open */ int spa_vdev_locks; /* locks grabbed */ uint64_t spa_creation_version; /* version at pool creation */ uint64_t spa_prev_software_version; /* See ub_software_version */ uint64_t spa_feat_for_write_obj; /* required to write to pool */ uint64_t spa_feat_for_read_obj; /* required to read from pool */ uint64_t spa_feat_desc_obj; /* Feature descriptions */ uint64_t spa_feat_enabled_txg_obj; /* Feature enabled txg */ + kmutex_t spa_feat_stats_lock; /* protects spa_feat_stats */ + nvlist_t *spa_feat_stats; /* Cache of enabled features */ /* cache feature refcounts */ uint64_t spa_feat_refcount_cache[SPA_FEATURES]; #ifdef illumos cyclic_id_t spa_deadman_cycid; /* cyclic id */ #else /* !illumos */ #ifdef _KERNEL struct callout spa_deadman_cycid; /* callout id */ struct task spa_deadman_task; #endif #endif /* illumos */ uint64_t spa_deadman_calls; /* number of deadman calls */ hrtime_t spa_sync_starttime; /* starting time fo spa_sync */ uint64_t spa_deadman_synctime; /* deadman expiration timer */ uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */ #ifdef illumos /* * spa_iokstat_lock protects spa_iokstat and * spa_queue_stats[]. */ kmutex_t spa_iokstat_lock; struct kstat *spa_iokstat; /* kstat of io to this pool */ struct { int spa_active; int spa_queued; } spa_queue_stats[ZIO_PRIORITY_NUM_QUEUEABLE]; #endif /* arc_memory_throttle() parameters during low memory condition */ uint64_t spa_lowmem_page_load; /* memory load during txg */ uint64_t spa_lowmem_last_txg; /* txg window start */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ /* * spa_refcount & spa_config_lock must be the last elements * because refcount_t changes size based on compilation options. * In order for the MDB module to function correctly, the other * fields must remain in the same location. */ spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */ refcount_t spa_refcount; /* number of opens */ #ifndef illumos boolean_t spa_splitting_newspa; /* creating new spa in split */ #endif }; extern const char *spa_config_path; extern void spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q, task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent); extern void spa_load_spares(spa_t *spa); extern void spa_load_l2cache(spa_t *spa); #ifdef __cplusplus } #endif #endif /* _SYS_SPA_IMPL_H */ Index: projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c (revision 337615) +++ projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c (revision 337616) @@ -1,509 +1,509 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2011, 2015 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include "zfeature_common.h" #include /* * ZFS Feature Flags * ----------------- * * ZFS feature flags are used to provide fine-grained versioning to the ZFS * on-disk format. Once enabled on a pool feature flags replace the old * spa_version() number. * * Each new on-disk format change will be given a uniquely identifying string * guid rather than a version number. This avoids the problem of different * organizations creating new on-disk formats with the same version number. To * keep feature guids unique they should consist of the reverse dns name of the * organization which implemented the feature and a short name for the feature, * separated by a colon (e.g. com.delphix:async_destroy). * * Reference Counts * ---------------- * * Within each pool features can be in one of three states: disabled, enabled, * or active. These states are differentiated by a reference count stored on * disk for each feature: * * 1) If there is no reference count stored on disk the feature is disabled. * 2) If the reference count is 0 a system administrator has enabled the * feature, but the feature has not been used yet, so no on-disk * format changes have been made. * 3) If the reference count is greater than 0 the feature is active. * The format changes required by the feature are currently on disk. * Note that if the feature's format changes are reversed the feature * may choose to set its reference count back to 0. * * Feature flags makes no differentiation between non-zero reference counts * for an active feature (e.g. a reference count of 1 means the same thing as a * reference count of 27834721), but feature implementations may choose to use * the reference count to store meaningful information. For example, a new RAID * implementation might set the reference count to the number of vdevs using * it. If all those disks are removed from the pool the feature goes back to * having a reference count of 0. * * It is the responsibility of the individual features to maintain a non-zero * reference count as long as the feature's format changes are present on disk. * * Dependencies * ------------ * * Each feature may depend on other features. The only effect of this * relationship is that when a feature is enabled all of its dependencies are * automatically enabled as well. Any future work to support disabling of * features would need to ensure that features cannot be disabled if other * enabled features depend on them. * * On-disk Format * -------------- * * When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES * (5000). In order for this to work the pool is automatically upgraded to * SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk * format changes will be in use. * * Information about features is stored in 3 ZAP objects in the pool's MOS. * These objects are linked to by the following names in the pool directory * object: * * 1) features_for_read: feature guid -> reference count * Features needed to open the pool for reading. * 2) features_for_write: feature guid -> reference count * Features needed to open the pool for writing. * 3) feature_descriptions: feature guid -> descriptive string * A human readable string. * * All enabled features appear in either features_for_read or * features_for_write, but not both. * * To open a pool in read-only mode only the features listed in * features_for_read need to be supported. * * To open the pool in read-write mode features in both features_for_read and * features_for_write need to be supported. * * Some features may be required to read the ZAP objects containing feature * information. To allow software to check for compatibility with these features * before the pool is opened their names must be stored in the label in a * new "features_for_read" entry (note that features that are only required * to write to a pool never need to be stored in the label since the * features_for_write ZAP object can be read before the pool is written to). * To save space in the label features must be explicitly marked as needing to * be written to the label. Also, reference counts are not stored in the label, * instead any feature whose reference count drops to 0 is removed from the * label. * * Adding New Features * ------------------- * * Features must be registered in zpool_feature_init() function in * zfeature_common.c using the zfeature_register() function. This function * has arguments to specify if the feature should be stored in the * features_for_read or features_for_write ZAP object and if it needs to be * written to the label when active. * * Once a feature is registered it will appear as a "feature@" * property which can be set by an administrator. Feature implementors should * use the spa_feature_is_enabled() and spa_feature_is_active() functions to * query the state of a feature and the spa_feature_incr() and * spa_feature_decr() functions to change an enabled feature's reference count. * Reference counts may only be updated in the syncing context. * * Features may not perform enable-time initialization. Instead, any such * initialization should occur when the feature is first used. This design * enforces that on-disk changes be made only when features are used. Code * should only check if a feature is enabled using spa_feature_is_enabled(), * not by relying on any feature specific metadata existing. If a feature is * enabled, but the feature's metadata is not on disk yet then it should be * created as needed. * * As an example, consider the com.delphix:async_destroy feature. This feature * relies on the existence of a bptree in the MOS that store blocks for * asynchronous freeing. This bptree is not created when async_destroy is * enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is * called to check if async_destroy is enabled. If it is and the bptree object * does not exist yet, the bptree object is created as part of the dataset * destroy and async_destroy's reference count is incremented to indicate it * has made an on-disk format change. Later, after the destroyed dataset's * blocks have all been asynchronously freed there is no longer any use for the * bptree object, so it is destroyed and async_destroy's reference count is * decremented back to 0 to indicate that it has undone its on-disk format * changes. */ typedef enum { FEATURE_ACTION_INCR, FEATURE_ACTION_DECR, } feature_action_t; /* * Checks that the active features in the pool are supported by * this software. Adds each unsupported feature (name -> description) to * the supplied nvlist. */ boolean_t spa_features_check(spa_t *spa, boolean_t for_write, nvlist_t *unsup_feat, nvlist_t *enabled_feat) { objset_t *os = spa->spa_meta_objset; boolean_t supported; zap_cursor_t zc; zap_attribute_t za; uint64_t obj = for_write ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; supported = B_TRUE; for (zap_cursor_init(&zc, os, obj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { ASSERT(za.za_integer_length == sizeof (uint64_t) && za.za_num_integers == 1); if (NULL != enabled_feat) { fnvlist_add_uint64(enabled_feat, za.za_name, za.za_first_integer); } if (za.za_first_integer != 0 && !zfeature_is_supported(za.za_name)) { supported = B_FALSE; if (NULL != unsup_feat) { char *desc = ""; char buf[MAXPATHLEN]; if (zap_lookup(os, spa->spa_feat_desc_obj, za.za_name, 1, sizeof (buf), buf) == 0) desc = buf; VERIFY(nvlist_add_string(unsup_feat, za.za_name, desc) == 0); } } } zap_cursor_fini(&zc); return (supported); } /* * Use an in-memory cache of feature refcounts for quick retrieval. * * Note: well-designed features will not need to use this; they should * use spa_feature_is_enabled() and spa_feature_is_active() instead. - * However, this is non-static for zdb and zhack. + * However, this is non-static for zdb, zhack, and spa_add_feature_stats(). */ int feature_get_refcount(spa_t *spa, zfeature_info_t *feature, uint64_t *res) { ASSERT(VALID_FEATURE_FID(feature->fi_feature)); if (spa->spa_feat_refcount_cache[feature->fi_feature] == SPA_FEATURE_DISABLED) { return (SET_ERROR(ENOTSUP)); } *res = spa->spa_feat_refcount_cache[feature->fi_feature]; return (0); } /* * Note: well-designed features will not need to use this; they should * use spa_feature_is_enabled() and spa_feature_is_active() instead. * However, this is non-static for zdb and zhack. */ int feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature, uint64_t *res) { int err; uint64_t refcount; uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; /* * If the pool is currently being created, the feature objects may not * have been allocated yet. Act as though all features are disabled. */ if (zapobj == 0) return (SET_ERROR(ENOTSUP)); err = zap_lookup(spa->spa_meta_objset, zapobj, feature->fi_guid, sizeof (uint64_t), 1, &refcount); if (err != 0) { if (err == ENOENT) return (SET_ERROR(ENOTSUP)); else return (err); } *res = refcount; return (0); } static int feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res) { uint64_t enabled_txg_obj = spa->spa_feat_enabled_txg_obj; ASSERT(zfeature_depends_on(feature->fi_feature, SPA_FEATURE_ENABLED_TXG)); if (!spa_feature_is_enabled(spa, feature->fi_feature)) { return (SET_ERROR(ENOTSUP)); } ASSERT(enabled_txg_obj != 0); VERIFY0(zap_lookup(spa->spa_meta_objset, spa->spa_feat_enabled_txg_obj, feature->fi_guid, sizeof (uint64_t), 1, res)); return (0); } /* * This function is non-static for zhack; it should otherwise not be used * outside this file. */ void feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount, dmu_tx_t *tx) { ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature)); uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid, sizeof (uint64_t), 1, &refcount, tx)); /* * feature_sync is called directly from zhack, allowing the * creation of arbitrary features whose fi_feature field may * be greater than SPA_FEATURES. When called from zhack, the * zfeature_info_t object's fi_feature field will be set to * SPA_FEATURE_NONE. */ if (feature->fi_feature != SPA_FEATURE_NONE) { uint64_t *refcount_cache = &spa->spa_feat_refcount_cache[feature->fi_feature]; #ifdef atomic_swap_64 VERIFY3U(*refcount_cache, ==, atomic_swap_64(refcount_cache, refcount)); #else *refcount_cache = refcount; #endif } if (refcount == 0) spa_deactivate_mos_feature(spa, feature->fi_guid); else if (feature->fi_flags & ZFEATURE_FLAG_MOS) spa_activate_mos_feature(spa, feature->fi_guid, tx); } /* * This function is non-static for zhack; it should otherwise not be used * outside this file. */ void feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx) { uint64_t initial_refcount = (feature->fi_flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE) ? 1 : 0; uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; ASSERT(0 != zapobj); ASSERT(zfeature_is_valid_guid(feature->fi_guid)); ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); /* * If the feature is already enabled, ignore the request. */ if (zap_contains(spa->spa_meta_objset, zapobj, feature->fi_guid) == 0) return; for (int i = 0; feature->fi_depends[i] != SPA_FEATURE_NONE; i++) spa_feature_enable(spa, feature->fi_depends[i], tx); VERIFY0(zap_update(spa->spa_meta_objset, spa->spa_feat_desc_obj, feature->fi_guid, 1, strlen(feature->fi_desc) + 1, feature->fi_desc, tx)); feature_sync(spa, feature, initial_refcount, tx); if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) { uint64_t enabling_txg = dmu_tx_get_txg(tx); if (spa->spa_feat_enabled_txg_obj == 0ULL) { spa->spa_feat_enabled_txg_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURE_ENABLED_TXG, tx); } spa_feature_incr(spa, SPA_FEATURE_ENABLED_TXG, tx); VERIFY0(zap_add(spa->spa_meta_objset, spa->spa_feat_enabled_txg_obj, feature->fi_guid, sizeof (uint64_t), 1, &enabling_txg, tx)); } } static void feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action, dmu_tx_t *tx) { uint64_t refcount; zfeature_info_t *feature = &spa_feature_table[fid]; uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj; ASSERT(VALID_FEATURE_FID(fid)); ASSERT(0 != zapobj); ASSERT(zfeature_is_valid_guid(feature->fi_guid)); ASSERT(dmu_tx_is_syncing(tx)); ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP); switch (action) { case FEATURE_ACTION_INCR: VERIFY3U(refcount, !=, UINT64_MAX); refcount++; break; case FEATURE_ACTION_DECR: VERIFY3U(refcount, !=, 0); refcount--; break; default: ASSERT(0); break; } feature_sync(spa, feature, refcount, tx); } void spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx) { /* * We create feature flags ZAP objects in two instances: during pool * creation and during pool upgrade. */ ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on && tx->tx_txg == TXG_INITIAL)); spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURES_FOR_READ, tx); spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURES_FOR_WRITE, tx); spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FEATURE_DESCRIPTIONS, tx); } /* * Enable any required dependencies, then enable the requested feature. */ void spa_feature_enable(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx) { ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES); ASSERT(VALID_FEATURE_FID(fid)); feature_enable_sync(spa, &spa_feature_table[fid], tx); } void spa_feature_incr(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx) { feature_do_action(spa, fid, FEATURE_ACTION_INCR, tx); } void spa_feature_decr(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx) { feature_do_action(spa, fid, FEATURE_ACTION_DECR, tx); } boolean_t spa_feature_is_enabled(spa_t *spa, spa_feature_t fid) { int err; uint64_t refcount; ASSERT(VALID_FEATURE_FID(fid)); if (spa_version(spa) < SPA_VERSION_FEATURES) return (B_FALSE); err = feature_get_refcount(spa, &spa_feature_table[fid], &refcount); ASSERT(err == 0 || err == ENOTSUP); return (err == 0); } boolean_t spa_feature_is_active(spa_t *spa, spa_feature_t fid) { int err; uint64_t refcount; ASSERT(VALID_FEATURE_FID(fid)); if (spa_version(spa) < SPA_VERSION_FEATURES) return (B_FALSE); err = feature_get_refcount(spa, &spa_feature_table[fid], &refcount); ASSERT(err == 0 || err == ENOTSUP); return (err == 0 && refcount > 0); } /* * For the feature specified by fid (which must depend on * SPA_FEATURE_ENABLED_TXG), return the TXG at which it was enabled in the * OUT txg argument. * * Returns B_TRUE if the feature is enabled, in which case txg will be filled * with the transaction group in which the specified feature was enabled. * Returns B_FALSE otherwise (i.e. if the feature is not enabled). */ boolean_t spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg) { int err; ASSERT(VALID_FEATURE_FID(fid)); if (spa_version(spa) < SPA_VERSION_FEATURES) return (B_FALSE); err = feature_get_enabled_txg(spa, &spa_feature_table[fid], txg); ASSERT(err == 0 || err == ENOTSUP); return (err == 0); } Index: projects/clang700-import/sys/cddl/contrib/opensolaris =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris (revision 337615) +++ projects/clang700-import/sys/cddl/contrib/opensolaris (revision 337616) Property changes on: projects/clang700-import/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r337586-337615 Index: projects/clang700-import/sys/dev/cxgbe/adapter.h =================================================================== --- projects/clang700-import/sys/dev/cxgbe/adapter.h (revision 337615) +++ projects/clang700-import/sys/dev/cxgbe/adapter.h (revision 337616) @@ -1,1323 +1,1327 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_ADAPTER_H__ #define __T4_ADAPTER_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "offload.h" #include "t4_ioctl.h" #include "common/t4_msg.h" #include "firmware/t4fw_interface.h" #define KTR_CXGBE KTR_SPARE3 MALLOC_DECLARE(M_CXGBE); #define CXGBE_UNIMPLEMENTED(s) \ panic("%s (%s, line %d) not implemented yet.", s, __FILE__, __LINE__) #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) __builtin_prefetch(x) #endif #ifndef SYSCTL_ADD_UQUAD #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD #define sysctl_handle_64 sysctl_handle_quad #define CTLTYPE_U64 CTLTYPE_QUAD #endif struct adapter; typedef struct adapter adapter_t; enum { /* * All ingress queues use this entry size. Note that the firmware event * queue and any iq expecting CPL_RX_PKT in the descriptor needs this to * be at least 64. */ IQ_ESIZE = 64, /* Default queue sizes for all kinds of ingress queues */ FW_IQ_QSIZE = 256, RX_IQ_QSIZE = 1024, /* All egress queues use this entry size */ EQ_ESIZE = 64, /* Default queue sizes for all kinds of egress queues */ CTRL_EQ_QSIZE = 128, TX_EQ_QSIZE = 1024, #if MJUMPAGESIZE != MCLBYTES SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif CL_METADATA_SIZE = CACHE_LINE_SIZE, SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */ TX_SGL_SEGS = 39, TX_SGL_SEGS_TSO = 38, TX_WR_FLITS = SGE_MAX_WR_LEN / 8 }; enum { /* adapter intr_type */ INTR_INTX = (1 << 0), INTR_MSI = (1 << 1), INTR_MSIX = (1 << 2) }; enum { XGMAC_MTU = (1 << 0), XGMAC_PROMISC = (1 << 1), XGMAC_ALLMULTI = (1 << 2), XGMAC_VLANEX = (1 << 3), XGMAC_UCADDR = (1 << 4), XGMAC_MCADDRS = (1 << 5), XGMAC_ALL = 0xffff }; enum { /* flags understood by begin_synchronized_op */ HOLD_LOCK = (1 << 0), SLEEP_OK = (1 << 1), INTR_OK = (1 << 2), /* flags understood by end_synchronized_op */ LOCK_HELD = HOLD_LOCK, }; enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), CHK_MBOX_ACCESS = (1 << 2), MASTER_PF = (1 << 3), ADAP_SYSCTL_CTX = (1 << 4), /* TOM_INIT_DONE= (1 << 5), No longer used */ BUF_PACKING_OK = (1 << 6), IS_VF = (1 << 7), CXGBE_BUSY = (1 << 9), /* port flags */ HAS_TRACEQ = (1 << 3), FIXED_IFMEDIA = (1 << 4), /* ifmedia list doesn't change. */ /* VI flags */ DOOMED = (1 << 0), VI_INIT_DONE = (1 << 1), VI_SYSCTL_CTX = (1 << 2), /* adapter debug_flags */ DF_DUMP_MBOX = (1 << 0), /* Log all mbox cmd/rpl. */ DF_LOAD_FW_ANYTIME = (1 << 1), /* Allow LOAD_FW after init */ DF_DISABLE_TCB_CACHE = (1 << 2), /* Disable TCB cache (T6+) */ }; #define IS_DOOMED(vi) ((vi)->flags & DOOMED) #define SET_DOOMED(vi) do {(vi)->flags |= DOOMED;} while (0) #define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY) #define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0) #define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0) struct vi_info { device_t dev; struct port_info *pi; struct ifnet *ifp; unsigned long flags; int if_flags; uint16_t *rss, *nm_rss; int smt_idx; /* for convenience */ uint16_t viid; int16_t xact_addr_filt;/* index of exact MAC address filter */ uint16_t rss_size; /* size of VI's RSS table slice */ uint16_t rss_base; /* start of VI's RSS table slice */ eventhandler_tag vlan_c; int nintr; int first_intr; /* These need to be int as they are used in sysctl */ int ntxq; /* # of tx queues */ int first_txq; /* index of first tx queue */ int rsrv_noflowq; /* Reserve queue 0 for non-flowid packets */ int nrxq; /* # of rx queues */ int first_rxq; /* index of first rx queue */ int nofldtxq; /* # of offload tx queues */ int first_ofld_txq; /* index of first offload tx queue */ int nofldrxq; /* # of offload rx queues */ int first_ofld_rxq; /* index of first offload rx queue */ int nnmtxq; int first_nm_txq; int nnmrxq; int first_nm_rxq; int tmr_idx; int ofld_tmr_idx; int pktc_idx; int ofld_pktc_idx; int qsize_rxq; int qsize_txq; struct timeval last_refreshed; struct fw_vi_stats_vf stats; struct callout tick; struct sysctl_ctx_list ctx; /* from ifconfig up to driver detach */ uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; struct tx_ch_rl_params { enum fw_sched_params_rate ratemode; /* %port (REL) or kbps (ABS) */ uint32_t maxrate; }; enum { CLRL_USER = (1 << 0), /* allocated manually. */ CLRL_SYNC = (1 << 1), /* sync hw update in progress. */ CLRL_ASYNC = (1 << 2), /* async hw update requested. */ CLRL_ERR = (1 << 3), /* last hw setup ended in error. */ }; struct tx_cl_rl_params { int refcount; uint8_t flags; enum fw_sched_params_rate ratemode; /* %port REL or ABS value */ enum fw_sched_params_unit rateunit; /* kbps or pps (when ABS) */ enum fw_sched_params_mode mode; /* aggr or per-flow */ uint32_t maxrate; uint16_t pktsize; uint16_t burstsize; }; /* Tx scheduler parameters for a channel/port */ struct tx_sched_params { /* Channel Rate Limiter */ struct tx_ch_rl_params ch_rl; /* Class WRR */ /* XXX */ /* Class Rate Limiter (including the default pktsize and burstsize). */ int pktsize; int burstsize; struct tx_cl_rl_params cl_rl[]; }; struct port_info { device_t dev; struct adapter *adapter; struct vi_info *vi; int nvi; int up_vis; int uld_vis; struct tx_sched_params *sched_params; struct mtx pi_lock; char lockname[16]; unsigned long flags; uint8_t lport; /* associated offload logical port */ int8_t mdio_addr; uint8_t port_type; uint8_t mod_type; uint8_t port_id; uint8_t tx_chan; uint8_t mps_bg_map; /* rx MPS buffer group bitmap */ uint8_t rx_e_chan_map; /* rx TP e-channel bitmap */ struct link_config link_cfg; struct link_config old_link_cfg; struct ifmedia media; struct timeval last_refreshed; struct port_stats stats; u_int tnl_cong_drops; u_int tx_parse_error; u_long tx_tls_records; u_long tx_tls_octets; u_long rx_tls_records; u_long rx_tls_octets; struct callout tick; }; #define IS_MAIN_VI(vi) ((vi) == &((vi)->pi->vi[0])) /* Where the cluster came from, how it has been carved up. */ struct cluster_layout { int8_t zidx; int8_t hwidx; uint16_t region1; /* mbufs laid out within this region */ /* region2 is the DMA region */ uint16_t region3; /* cluster_metadata within this region */ }; struct cluster_metadata { u_int refcount; struct fl_sdesc *sd; /* For debug only. Could easily be stale */ }; struct fl_sdesc { caddr_t cl; uint16_t nmbuf; /* # of driver originated mbufs with ref on cluster */ struct cluster_layout cll; }; struct tx_desc { __be64 flit[8]; }; struct tx_sdesc { struct mbuf *m; /* m_nextpkt linked chain of frames */ uint8_t desc_used; /* # of hardware descriptors used by the WR */ }; #define IQ_PAD (IQ_ESIZE - sizeof(struct rsp_ctrl) - sizeof(struct rss_header)) struct iq_desc { struct rss_header rss; uint8_t cpl[IQ_PAD]; struct rsp_ctrl rsp; }; #undef IQ_PAD CTASSERT(sizeof(struct iq_desc) == IQ_ESIZE); enum { /* iq flags */ IQ_ALLOCATED = (1 << 0), /* firmware resources allocated */ IQ_HAS_FL = (1 << 1), /* iq associated with a freelist */ /* 1 << 2 Used to be IQ_INTR */ IQ_LRO_ENABLED = (1 << 3), /* iq is an eth rxq with LRO enabled */ IQ_ADJ_CREDIT = (1 << 4), /* hw is off by 1 credit for this iq */ /* iq state */ IQS_DISABLED = 0, IQS_BUSY = 1, IQS_IDLE = 2, /* netmap related flags */ NM_OFF = 0, NM_ON = 1, NM_BUSY = 2, }; enum { CPL_COOKIE_RESERVED = 0, CPL_COOKIE_FILTER, CPL_COOKIE_DDP0, CPL_COOKIE_DDP1, CPL_COOKIE_TOM, CPL_COOKIE_HASHFILTER, CPL_COOKIE_ETHOFLD, CPL_COOKIE_AVAILABLE3, NUM_CPL_COOKIES = 8 /* Limited by M_COOKIE. Do not increase. */ }; struct sge_iq; struct rss_header; typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *, struct mbuf *); typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *); typedef int (*fw_msg_handler_t)(struct adapter *, const __be64 *); /* * Ingress Queue: T4 is producer, driver is consumer. */ struct sge_iq { uint32_t flags; volatile int state; struct adapter *adapter; struct iq_desc *desc; /* KVA of descriptor ring */ int8_t intr_pktc_idx; /* packet count threshold index */ uint8_t gen; /* generation bit */ uint8_t intr_params; /* interrupt holdoff parameters */ uint8_t intr_next; /* XXX: holdoff for next interrupt */ uint16_t qsize; /* size (# of entries) of the queue */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer index */ uint16_t cntxt_id; /* SGE context id for the iq */ uint16_t abs_id; /* absolute SGE id for the iq */ STAILQ_ENTRY(sge_iq) link; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ }; enum { EQ_CTRL = 1, EQ_ETH = 2, EQ_OFLD = 3, /* eq flags */ EQ_TYPEMASK = 0x3, /* 2 lsbits hold the type (see above) */ EQ_ALLOCATED = (1 << 2), /* firmware resources allocated */ EQ_ENABLED = (1 << 3), /* open for business */ EQ_QFLUSH = (1 << 4), /* if_qflush in progress */ }; /* Listed in order of preference. Update t4_sysctls too if you change these */ enum {DOORBELL_UDB, DOORBELL_WCWR, DOORBELL_UDBWC, DOORBELL_KDB}; /* * Egress Queue: driver is producer, T4 is consumer. * * Note: A free list is an egress queue (driver produces the buffers and T4 * consumes them) but it's special enough to have its own struct (see sge_fl). */ struct sge_eq { unsigned int flags; /* MUST be first */ unsigned int cntxt_id; /* SGE context id for the eq */ unsigned int abs_id; /* absolute SGE id for the eq */ struct mtx eq_lock; struct tx_desc *desc; /* KVA of descriptor ring */ uint8_t doorbells; volatile uint32_t *udb; /* KVA of doorbell (lies within BAR2) */ u_int udb_qid; /* relative qid within the doorbell page */ uint16_t sidx; /* index of the entry with the status page */ uint16_t cidx; /* consumer idx (desc idx) */ uint16_t pidx; /* producer idx (desc idx) */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint16_t iqid; /* iq that gets egr_update for the eq */ uint8_t tx_chan; /* tx channel used by the eq */ volatile u_int equiq; /* EQUIQ outstanding */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; /* bus address of descriptor ring */ char lockname[16]; }; struct sw_zone_info { uma_zone_t zone; /* zone that this cluster comes from */ int size; /* size of cluster: 2K, 4K, 9K, 16K, etc. */ int type; /* EXT_xxx type of the cluster */ int8_t head_hwidx; int8_t tail_hwidx; }; struct hw_buf_info { int8_t zidx; /* backpointer to zone; -ve means unused */ int8_t next; /* next hwidx for this zone; -1 means no more */ int size; }; enum { NUM_MEMWIN = 3, MEMWIN0_APERTURE = 2048, MEMWIN0_BASE = 0x1b800, MEMWIN1_APERTURE = 32768, MEMWIN1_BASE = 0x28000, MEMWIN2_APERTURE_T4 = 65536, MEMWIN2_BASE_T4 = 0x30000, MEMWIN2_APERTURE_T5 = 128 * 1024, MEMWIN2_BASE_T5 = 0x60000, }; struct memwin { struct rwlock mw_lock __aligned(CACHE_LINE_SIZE); uint32_t mw_base; /* constant after setup_memwin */ uint32_t mw_aperture; /* ditto */ uint32_t mw_curpos; /* protected by mw_lock */ }; enum { FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */ FL_DOOMED = (1 << 1), /* about to be destroyed */ FL_BUF_PACKING = (1 << 2), /* buffer packing enabled */ FL_BUF_RESUME = (1 << 3), /* resume from the middle of the frame */ }; #define FL_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) <= fl->lowat) #define FL_NOT_RUNNING_LOW(fl) \ (IDXDIFF(fl->dbidx * 8, fl->cidx, fl->sidx * 8) >= 2 * fl->lowat) struct sge_fl { struct mtx fl_lock; __be64 *desc; /* KVA of descriptor ring, ptr to addresses */ struct fl_sdesc *sdesc; /* KVA of software descriptor ring */ struct cluster_layout cll_def; /* default refill zone, layout */ uint16_t lowat; /* # of buffers <= this means fl needs help */ int flags; uint16_t buf_boundary; /* The 16b idx all deal with hw descriptors */ uint16_t dbidx; /* hw pidx after last doorbell */ uint16_t sidx; /* index of status page */ volatile uint16_t hw_cidx; /* The 32b idx are all buffer idx, not hardware descriptor idx */ uint32_t cidx; /* consumer index */ uint32_t pidx; /* producer index */ uint32_t dbval; u_int rx_offset; /* offset in fl buf (when buffer packing) */ volatile uint32_t *udb; uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */ uint64_t mbuf_inlined; /* # of mbuf created within clusters */ uint64_t cl_allocated; /* # of clusters allocated */ uint64_t cl_recycled; /* # of clusters recycled */ uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */ /* These 3 are valid when FL_BUF_RESUME is set, stale otherwise. */ struct mbuf *m0; struct mbuf **pnext; u_int remaining; uint16_t qsize; /* # of hw descriptors (status page included) */ uint16_t cntxt_id; /* SGE context id for the freelist */ TAILQ_ENTRY(sge_fl) link; /* All starving freelists */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; char lockname[16]; bus_addr_t ba; /* bus address of descriptor ring */ struct cluster_layout cll_alt; /* alternate refill zone, layout */ }; struct mp_ring; /* txq: SGE egress queue + what's needed for Ethernet NIC */ struct sge_txq { struct sge_eq eq; /* MUST be first */ struct ifnet *ifp; /* the interface this txq belongs to */ struct mp_ring *r; /* tx software ring */ struct tx_sdesc *sdesc; /* KVA of software descriptor ring */ struct sglist *gl; __be32 cpl_ctrl0; /* for convenience */ int tc_idx; /* traffic class */ struct task tx_reclaim_task; /* stats for common events first */ uint64_t txcsum; /* # of times hardware assisted with checksum */ uint64_t tso_wrs; /* # of TSO work requests */ uint64_t vlan_insertion;/* # of times VLAN tag was inserted */ uint64_t imm_wrs; /* # of work requests with immediate data */ uint64_t sgl_wrs; /* # of work requests with direct SGL */ uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */ uint64_t txpkts0_wrs; /* # of type0 coalesced tx work requests */ uint64_t txpkts1_wrs; /* # of type1 coalesced tx work requests */ uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */ uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ struct ifnet *ifp; /* the interface this rxq belongs to */ #if defined(INET) || defined(INET6) struct lro_ctrl lro; /* LRO state */ #endif /* stats for common events first */ uint64_t rxcsum; /* # of times hardware assisted with checksum */ uint64_t vlan_extraction;/* # of times VLAN tag was extracted */ /* stats for not-that-common events */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_rxq * iq_to_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_rxq, iq)); } /* ofld_rxq: SGE ingress queue + SGE free list + miscellaneous items */ struct sge_ofld_rxq { struct sge_iq iq; /* MUST be first */ struct sge_fl fl; /* MUST follow iq */ } __aligned(CACHE_LINE_SIZE); static inline struct sge_ofld_rxq * iq_to_ofld_rxq(struct sge_iq *iq) { return (__containerof(iq, struct sge_ofld_rxq, iq)); } struct wrqe { STAILQ_ENTRY(wrqe) link; struct sge_wrq *wrq; int wr_len; char wr[] __aligned(16); }; struct wrq_cookie { TAILQ_ENTRY(wrq_cookie) link; int ndesc; int pidx; }; /* * wrq: SGE egress queue that is given prebuilt work requests. Both the control * and offload tx queues are of this type. */ struct sge_wrq { struct sge_eq eq; /* MUST be first */ struct adapter *adapter; struct task wrq_tx_task; /* Tx desc reserved but WR not "committed" yet. */ TAILQ_HEAD(wrq_incomplete_wrs , wrq_cookie) incomplete_wrs; /* List of WRs ready to go out as soon as descriptors are available. */ STAILQ_HEAD(, wrqe) wr_list; u_int nwr_pending; u_int ndesc_needed; /* stats for common events first */ uint64_t tx_wrs_direct; /* # of WRs written directly to desc ring. */ uint64_t tx_wrs_ss; /* # of WRs copied from scratch space. */ uint64_t tx_wrs_copied; /* # of WRs queued and copied to desc ring. */ /* stats for not-that-common events */ /* * Scratch space for work requests that wrap around after reaching the * status page, and some information about the last WR that used it. */ uint16_t ss_pidx; uint16_t ss_len; uint8_t ss[SGE_MAX_WR_LEN]; } __aligned(CACHE_LINE_SIZE); #define INVALID_NM_RXQ_CNTXT_ID ((uint16_t)(-1)) struct sge_nm_rxq { + volatile int nm_state; /* NM_OFF, NM_ON, or NM_BUSY */ struct vi_info *vi; struct iq_desc *iq_desc; uint16_t iq_abs_id; uint16_t iq_cntxt_id; uint16_t iq_cidx; uint16_t iq_sidx; uint8_t iq_gen; __be64 *fl_desc; uint16_t fl_cntxt_id; uint32_t fl_cidx; uint32_t fl_pidx; uint32_t fl_sidx; uint32_t fl_db_val; u_int fl_hwidx:4; u_int fl_db_saved; u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t iq_desc_tag; bus_dmamap_t iq_desc_map; bus_addr_t iq_ba; int intr_idx; bus_dma_tag_t fl_desc_tag; bus_dmamap_t fl_desc_map; bus_addr_t fl_ba; } __aligned(CACHE_LINE_SIZE); #define INVALID_NM_TXQ_CNTXT_ID ((u_int)(-1)) struct sge_nm_txq { struct tx_desc *desc; uint16_t cidx; uint16_t pidx; uint16_t sidx; uint16_t equiqidx; /* EQUIQ last requested at this pidx */ uint16_t equeqidx; /* EQUEQ last requested at this pidx */ uint16_t dbidx; /* pidx of the most recent doorbell */ uint8_t doorbells; volatile uint32_t *udb; u_int udb_qid; u_int cntxt_id; __be32 cpl_ctrl0; /* for convenience */ u_int nid; /* netmap ring # for this queue */ /* infrequently used items after this */ bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_addr_t ba; int iqidx; } __aligned(CACHE_LINE_SIZE); struct sge { int nrxq; /* total # of Ethernet rx queues */ int ntxq; /* total # of Ethernet tx queues */ int nofldrxq; /* total # of TOE rx queues */ int nofldtxq; /* total # of TOE tx queues */ int nnmrxq; /* total # of netmap rx queues */ int nnmtxq; /* total # of netmap tx queues */ int niq; /* total # of ingress queues */ int neq; /* total # of egress queues */ struct sge_iq fwq; /* Firmware event queue */ struct sge_wrq mgmtq; /* Management queue (control queue) */ struct sge_wrq *ctrlq; /* Control queues */ struct sge_txq *txq; /* NIC tx queues */ struct sge_rxq *rxq; /* NIC rx queues */ struct sge_wrq *ofld_txq; /* TOE tx queues */ struct sge_ofld_rxq *ofld_rxq; /* TOE rx queues */ struct sge_nm_txq *nm_txq; /* netmap tx queues */ struct sge_nm_rxq *nm_rxq; /* netmap rx queues */ uint16_t iq_start; /* first cntxt_id */ uint16_t iq_base; /* first abs_id */ int eq_start; /* first cntxt_id */ int eq_base; /* first abs_id */ struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ int8_t safe_hwidx1; /* may not have room for metadata */ int8_t safe_hwidx2; /* with room for metadata and maybe more */ struct sw_zone_info sw_zone_info[SW_ZONE_SIZES]; struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES]; }; struct devnames { const char *nexus_name; const char *ifnet_name; const char *vi_ifnet_name; const char *pf03_drv_name; const char *vf_nexus_name; const char *vf_ifnet_name; }; struct adapter { SLIST_ENTRY(adapter) link; device_t dev; struct cdev *cdev; const struct devnames *names; /* PCIe register resources */ int regs_rid; struct resource *regs_res; int msix_rid; struct resource *msix_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; int udbs_rid; struct resource *udbs_res; volatile uint8_t *udbs_base; unsigned int pf; unsigned int mbox; unsigned int vpd_busy; unsigned int vpd_flag; /* Interrupt information */ int intr_type; int intr_count; struct irq { struct resource *res; int rid; - volatile int nm_state; /* NM_OFF, NM_ON, or NM_BUSY */ void *tag; struct sge_rxq *rxq; struct sge_nm_rxq *nm_rxq; } __aligned(CACHE_LINE_SIZE) *irq; int sge_gts_reg; int sge_kdoorbell_reg; bus_dma_tag_t dmat; /* Parent DMA tag */ struct sge sge; int lro_timeout; int sc_do_rxcopy; struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */ struct port_info *port[MAX_NPORTS]; uint8_t chan_map[MAX_NCHAN]; /* channel -> port */ void *tom_softc; /* (struct tom_data *) */ struct tom_tunables tt; struct t4_offload_policy *policy; struct rwlock policy_lock; void *iwarp_softc; /* (struct c4iw_dev *) */ struct iw_tunables iwt; void *iscsi_ulp_softc; /* (struct cxgbei_data *) */ void *ccr_softc; /* (struct ccr_softc *) */ struct l2t_data *l2t; /* L2 table */ struct smt_data *smt; /* Source MAC Table */ struct tid_info tids; uint8_t doorbells; int offload_map; /* ports with IFCAP_TOE enabled */ int active_ulds; /* ULDs activated on this adapter */ int flags; int debug_flags; char ifp_lockname[16]; struct mtx ifp_lock; struct ifnet *ifp; /* tracer ifp */ struct ifmedia media; int traceq; /* iq used by all tracers, -1 if none */ int tracer_valid; /* bitmap of valid tracers */ int tracer_enabled; /* bitmap of enabled tracers */ char fw_version[16]; char tp_version[16]; char er_version[16]; char bs_version[16]; char cfg_file[32]; u_int cfcsum; struct adapter_params params; const struct chip_params *chip_params; struct t4_virt_res vres; uint16_t nbmcaps; uint16_t linkcaps; uint16_t switchcaps; uint16_t niccaps; uint16_t toecaps; uint16_t rdmacaps; uint16_t cryptocaps; uint16_t iscsicaps; uint16_t fcoecaps; struct sysctl_ctx_list ctx; /* from adapter_full_init to full_uninit */ struct mtx sc_lock; char lockname[16]; /* Starving free lists */ struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */ TAILQ_HEAD(, sge_fl) sfl; struct callout sfl_callout; struct mtx reg_lock; /* for indirect register access */ struct memwin memwin[NUM_MEMWIN]; /* memory windows */ struct mtx tc_lock; struct task tc_task; const char *last_op; const void *last_op_thr; int last_op_flags; }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) #define ADAPTER_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) #define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) #define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED) #define ASSERT_SYNCHRONIZED_OP(sc) \ KASSERT(IS_BUSY(sc) && \ (mtx_owned(&(sc)->sc_lock) || sc->last_op_thr == curthread), \ ("%s: operation not synchronized.", __func__)) #define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock) #define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock) #define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED) #define PORT_LOCK_ASSERT_NOTOWNED(pi) mtx_assert(&(pi)->pi_lock, MA_NOTOWNED) #define FL_LOCK(fl) mtx_lock(&(fl)->fl_lock) #define FL_TRYLOCK(fl) mtx_trylock(&(fl)->fl_lock) #define FL_UNLOCK(fl) mtx_unlock(&(fl)->fl_lock) #define FL_LOCK_ASSERT_OWNED(fl) mtx_assert(&(fl)->fl_lock, MA_OWNED) #define FL_LOCK_ASSERT_NOTOWNED(fl) mtx_assert(&(fl)->fl_lock, MA_NOTOWNED) #define RXQ_FL_LOCK(rxq) FL_LOCK(&(rxq)->fl) #define RXQ_FL_UNLOCK(rxq) FL_UNLOCK(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_OWNED(rxq) FL_LOCK_ASSERT_OWNED(&(rxq)->fl) #define RXQ_FL_LOCK_ASSERT_NOTOWNED(rxq) FL_LOCK_ASSERT_NOTOWNED(&(rxq)->fl) #define EQ_LOCK(eq) mtx_lock(&(eq)->eq_lock) #define EQ_TRYLOCK(eq) mtx_trylock(&(eq)->eq_lock) #define EQ_UNLOCK(eq) mtx_unlock(&(eq)->eq_lock) #define EQ_LOCK_ASSERT_OWNED(eq) mtx_assert(&(eq)->eq_lock, MA_OWNED) #define EQ_LOCK_ASSERT_NOTOWNED(eq) mtx_assert(&(eq)->eq_lock, MA_NOTOWNED) #define TXQ_LOCK(txq) EQ_LOCK(&(txq)->eq) #define TXQ_TRYLOCK(txq) EQ_TRYLOCK(&(txq)->eq) #define TXQ_UNLOCK(txq) EQ_UNLOCK(&(txq)->eq) #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) #define CH_DUMP_MBOX(sc, mbox, data_reg) \ do { \ if (sc->debug_flags & DF_DUMP_MBOX) { \ log(LOG_NOTICE, \ "%s mbox %u: %016llx %016llx %016llx %016llx " \ "%016llx %016llx %016llx %016llx\n", \ device_get_nameunit(sc->dev), mbox, \ (unsigned long long)t4_read_reg64(sc, data_reg), \ (unsigned long long)t4_read_reg64(sc, data_reg + 8), \ (unsigned long long)t4_read_reg64(sc, data_reg + 16), \ (unsigned long long)t4_read_reg64(sc, data_reg + 24), \ (unsigned long long)t4_read_reg64(sc, data_reg + 32), \ (unsigned long long)t4_read_reg64(sc, data_reg + 40), \ (unsigned long long)t4_read_reg64(sc, data_reg + 48), \ (unsigned long long)t4_read_reg64(sc, data_reg + 56)); \ } \ } while (0) #define for_each_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.txq[vi->first_txq], iter = 0; \ iter < vi->ntxq; ++iter, ++q) #define for_each_rxq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.rxq[vi->first_rxq], iter = 0; \ iter < vi->nrxq; ++iter, ++q) #define for_each_ofld_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.ofld_txq[vi->first_ofld_txq], iter = 0; \ iter < vi->nofldtxq; ++iter, ++q) #define for_each_ofld_rxq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.ofld_rxq[vi->first_ofld_rxq], iter = 0; \ iter < vi->nofldrxq; ++iter, ++q) #define for_each_nm_txq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.nm_txq[vi->first_nm_txq], iter = 0; \ iter < vi->nnmtxq; ++iter, ++q) #define for_each_nm_rxq(vi, iter, q) \ for (q = &vi->pi->adapter->sge.nm_rxq[vi->first_nm_rxq], iter = 0; \ iter < vi->nnmrxq; ++iter, ++q) #define for_each_vi(_pi, _iter, _vi) \ for ((_vi) = (_pi)->vi, (_iter) = 0; (_iter) < (_pi)->nvi; \ ++(_iter), ++(_vi)) #define IDXINCR(idx, incr, wrap) do { \ idx = wrap - idx > incr ? idx + incr : incr - (wrap - idx); \ } while (0) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 /* One for firmware events */ #define T4VF_EXTRA_INTR 1 static inline int forwarding_intr_to_fwq(struct adapter *sc) { return (sc->intr_count == 1); } static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { return bus_space_read_4(sc->bt, sc->bh, reg); } static inline void t4_write_reg(struct adapter *sc, uint32_t reg, uint32_t val) { bus_space_write_4(sc->bt, sc->bh, reg, val); } static inline uint64_t t4_read_reg64(struct adapter *sc, uint32_t reg) { #ifdef __LP64__ return bus_space_read_8(sc->bt, sc->bh, reg); #else return (uint64_t)bus_space_read_4(sc->bt, sc->bh, reg) + ((uint64_t)bus_space_read_4(sc->bt, sc->bh, reg + 4) << 32); #endif } static inline void t4_write_reg64(struct adapter *sc, uint32_t reg, uint64_t val) { #ifdef __LP64__ bus_space_write_8(sc->bt, sc->bh, reg, val); #else bus_space_write_4(sc->bt, sc->bh, reg, val); bus_space_write_4(sc->bt, sc->bh, reg + 4, val>> 32); #endif } static inline void t4_os_pci_read_cfg1(struct adapter *sc, int reg, uint8_t *val) { *val = pci_read_config(sc->dev, reg, 1); } static inline void t4_os_pci_write_cfg1(struct adapter *sc, int reg, uint8_t val) { pci_write_config(sc->dev, reg, val, 1); } static inline void t4_os_pci_read_cfg2(struct adapter *sc, int reg, uint16_t *val) { *val = pci_read_config(sc->dev, reg, 2); } static inline void t4_os_pci_write_cfg2(struct adapter *sc, int reg, uint16_t val) { pci_write_config(sc->dev, reg, val, 2); } static inline void t4_os_pci_read_cfg4(struct adapter *sc, int reg, uint32_t *val) { *val = pci_read_config(sc->dev, reg, 4); } static inline void t4_os_pci_write_cfg4(struct adapter *sc, int reg, uint32_t val) { pci_write_config(sc->dev, reg, val, 4); } static inline struct port_info * adap2pinfo(struct adapter *sc, int idx) { return (sc->port[idx]); } static inline void t4_os_set_hw_addr(struct port_info *pi, uint8_t hw_addr[]) { bcopy(hw_addr, pi->vi[0].hw_addr, ETHER_ADDR_LEN); } static inline bool is_10G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) != 0); } static inline bool is_25G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) != 0); } static inline bool is_40G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) != 0); } static inline bool is_100G_port(const struct port_info *pi) { return ((pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) != 0); } static inline int port_top_speed(const struct port_info *pi) { if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) return (100); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) return (40); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) return (25); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) return (10); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_1G) return (1); return (0); } static inline int tx_resume_threshold(struct sge_eq *eq) { /* not quite the same as qsize / 4, but this will do. */ return (eq->sidx / 4); } static inline int t4_use_ldst(struct adapter *sc) { #ifdef notyet return (sc->flags & FW_OK || !sc->use_bd); #else return (0); #endif } /* t4_main.c */ extern int t4_ntxq; extern int t4_nrxq; extern int t4_intr_types; extern int t4_tmr_idx; extern int t4_pktc_idx; extern unsigned int t4_qsize_rxq; extern unsigned int t4_qsize_txq; extern device_method_t cxgbe_methods[]; int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(struct port_info *); void t4_os_link_changed(struct port_info *); void t4_iterate(void (*)(struct adapter *, void *), void *); void t4_init_devnames(struct adapter *); void t4_add_adapter(struct adapter *); void t4_aes_getdeckey(void *, const void *, unsigned int); int t4_detach_common(device_t); int t4_map_bars_0_and_4(struct adapter *); int t4_map_bar_2(struct adapter *); int t4_setup_intr_handlers(struct adapter *); void t4_sysctls(struct adapter *); int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *); void doom_vi(struct adapter *, struct vi_info *); void end_synchronized_op(struct adapter *, int); int update_mac_settings(struct ifnet *, int); int adapter_full_init(struct adapter *); int adapter_full_uninit(struct adapter *); uint64_t cxgbe_get_counter(struct ifnet *, ift_counter); int vi_full_init(struct vi_info *); int vi_full_uninit(struct vi_info *); void vi_sysctls(struct vi_info *); void vi_tick(void *); int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); int alloc_atid_tab(struct tid_info *, int); void free_atid_tab(struct tid_info *); int alloc_atid(struct adapter *, void *); void *lookup_atid(struct adapter *, int); void free_atid(struct adapter *, int); void release_tid(struct adapter *, int, struct sge_wrq *); #ifdef DEV_NETMAP /* t4_netmap.c */ +struct sge_nm_rxq; void cxgbe_nm_attach(struct vi_info *); void cxgbe_nm_detach(struct vi_info *); -void t4_nm_intr(void *); +void service_nm_rxq(struct sge_nm_rxq *); #endif /* t4_sge.c */ void t4_sge_modload(void); void t4_sge_modunload(void); uint64_t t4_sge_extfree_refs(void); void t4_tweak_chip_settings(struct adapter *); int t4_read_chip_settings(struct adapter *); int t4_create_dma_tag(struct adapter *); void t4_sge_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid_list *); int t4_destroy_dma_tag(struct adapter *); int t4_setup_adapter_queues(struct adapter *); int t4_teardown_adapter_queues(struct adapter *); int t4_setup_vi_queues(struct vi_info *); int t4_teardown_vi_queues(struct vi_info *); void t4_intr_all(void *); void t4_intr(void *); +#ifdef DEV_NETMAP +void t4_nm_intr(void *); void t4_vi_intr(void *); +#endif void t4_intr_err(void *); void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); void t4_update_fl_bufsize(struct ifnet *); int parse_pkt(struct adapter *, struct mbuf **); void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *); void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *); int tnl_cong(struct port_info *, int); void t4_register_an_handler(an_handler_t); void t4_register_fw_msg_handler(int, fw_msg_handler_t); void t4_register_cpl_handler(int, cpl_handler_t); void t4_register_shared_cpl_handler(int, cpl_handler_t, int); #ifdef RATELIMIT int ethofld_transmit(struct ifnet *, struct mbuf *); void send_etid_flush_wr(struct cxgbe_snd_tag *); #endif /* t4_tracer.c */ struct t4_tracer; void t4_tracer_modload(void); void t4_tracer_modunload(void); void t4_tracer_port_detach(struct adapter *); int t4_get_tracer(struct adapter *, struct t4_tracer *); int t4_set_tracer(struct adapter *, struct t4_tracer *); int t4_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); int t5_trace_pkt(struct sge_iq *, const struct rss_header *, struct mbuf *); /* t4_sched.c */ int t4_set_sched_class(struct adapter *, struct t4_sched_params *); int t4_set_sched_queue(struct adapter *, struct t4_sched_queue *); int t4_init_tx_sched(struct adapter *); int t4_free_tx_sched(struct adapter *); void t4_update_tx_sched(struct adapter *); int t4_reserve_cl_rl_kbps(struct adapter *, int, u_int, int *); void t4_release_cl_rl(struct adapter *, int, int); int sysctl_tc(SYSCTL_HANDLER_ARGS); int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #ifdef RATELIMIT void t4_init_etid_table(struct adapter *); void t4_free_etid_table(struct adapter *); struct cxgbe_snd_tag *lookup_etid(struct adapter *, int); int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); int cxgbe_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); int cxgbe_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); void cxgbe_snd_tag_free(struct m_snd_tag *); void cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *); #endif /* t4_filter.c */ int get_filter_mode(struct adapter *, uint32_t *); int set_filter_mode(struct adapter *, uint32_t); int get_filter(struct adapter *, struct t4_filter *); int set_filter(struct adapter *, struct t4_filter *); int del_filter(struct adapter *, struct t4_filter *); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int t4_hashfilter_ao_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int t4_hashfilter_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); int t4_del_hashfilter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); void free_hftid_tab(struct tid_info *); static inline struct wrqe * alloc_wrqe(int wr_len, struct sge_wrq *wrq) { int len = offsetof(struct wrqe, wr) + wr_len; struct wrqe *wr; wr = malloc(len, M_CXGBE, M_NOWAIT); if (__predict_false(wr == NULL)) return (NULL); wr->wr_len = wr_len; wr->wrq = wrq; return (wr); } static inline void * wrtod(struct wrqe *wr) { return (&wr->wr[0]); } static inline void free_wrqe(struct wrqe *wr) { free(wr, M_CXGBE); } static inline void t4_wrq_tx(struct adapter *sc, struct wrqe *wr) { struct sge_wrq *wrq = wr->wrq; TXQ_LOCK(wrq); t4_wrq_tx_locked(sc, wrq, wr); TXQ_UNLOCK(wrq); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } #endif Index: projects/clang700-import/sys/dev/cxgbe/t4_main.c =================================================================== --- projects/clang700-import/sys/dev/cxgbe/t4_main.c (revision 337615) +++ projects/clang700-import/sys/dev/cxgbe/t4_main.c (revision 337616) @@ -1,10353 +1,10370 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #include #include #endif #include #ifdef DDB #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "cudbg/cudbg.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #include "t4_if.h" #include "t4_smt.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static int t4_ready(device_t); static int t4_read_port_device(device_t, int, device_t *); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; /* T6 bus driver interface */ static int t6_probe(device_t); static device_method_t t6_methods[] = { DEVMETHOD(device_probe, t6_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t6_driver = { "t6nex", t6_methods, sizeof(struct adapter) }; /* T6 port (cc) interface */ static driver_t cc_driver = { "cc", cxgbe_methods, sizeof(struct port_info) }; /* T6 VI (vcc) interface */ static driver_t vcc_driver = { "vcc", vcxgbe_methods, sizeof(struct vi_info) }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -n as an indication to tweak_tunables() that it should * provide a reasonable default (upto n) when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, NIC and offload. */ #define NTXQ 16 int t4_ntxq = -NTXQ; TUNABLE_INT("hw.cxgbe.ntxq", &t4_ntxq); TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq); /* Old name, undocumented */ #define NRXQ 8 int t4_nrxq = -NRXQ; TUNABLE_INT("hw.cxgbe.nrxq", &t4_nrxq); TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq); /* Old name, undocumented */ #define NTXQ_VI 1 static int t4_ntxq_vi = -NTXQ_VI; TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); #define NRXQ_VI 1 static int t4_nrxq_vi = -NRXQ_VI; TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #if defined(TCP_OFFLOAD) || defined(RATELIMIT) #define NOFLDTXQ 8 static int t4_nofldtxq = -NOFLDTXQ; TUNABLE_INT("hw.cxgbe.nofldtxq", &t4_nofldtxq); #define NOFLDRXQ 2 static int t4_nofldrxq = -NOFLDRXQ; TUNABLE_INT("hw.cxgbe.nofldrxq", &t4_nofldrxq); #define NOFLDTXQ_VI 1 static int t4_nofldtxq_vi = -NOFLDTXQ_VI; TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); #define NOFLDRXQ_VI 1 static int t4_nofldrxq_vi = -NOFLDRXQ_VI; TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); #define TMR_IDX_OFLD 1 int t4_tmr_idx_ofld = TMR_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_ofld", &t4_tmr_idx_ofld); #define PKTC_IDX_OFLD (-1) int t4_pktc_idx_ofld = PKTC_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_ofld", &t4_pktc_idx_ofld); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_idle = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_idle", &t4_toe_keepalive_idle); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_interval = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_interval", &t4_toe_keepalive_interval); /* 0 means chip/fw default, non-zero number is # of keepalives before abort */ static int t4_toe_keepalive_count = 0; TUNABLE_INT("hw.cxgbe.toe.keepalive_count", &t4_toe_keepalive_count); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_min = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_min", &t4_toe_rexmt_min); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_max = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_max", &t4_toe_rexmt_max); /* 0 means chip/fw default, non-zero number is # of rexmt before abort */ static int t4_toe_rexmt_count = 0; TUNABLE_INT("hw.cxgbe.toe.rexmt_count", &t4_toe_rexmt_count); /* -1 means chip/fw default, other values are raw backoff values to use */ static int t4_toe_rexmt_backoff[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.0", &t4_toe_rexmt_backoff[0]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.1", &t4_toe_rexmt_backoff[1]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.2", &t4_toe_rexmt_backoff[2]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.3", &t4_toe_rexmt_backoff[3]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.4", &t4_toe_rexmt_backoff[4]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.5", &t4_toe_rexmt_backoff[5]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.6", &t4_toe_rexmt_backoff[6]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.7", &t4_toe_rexmt_backoff[7]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.8", &t4_toe_rexmt_backoff[8]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.9", &t4_toe_rexmt_backoff[9]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.10", &t4_toe_rexmt_backoff[10]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.11", &t4_toe_rexmt_backoff[11]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.12", &t4_toe_rexmt_backoff[12]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.13", &t4_toe_rexmt_backoff[13]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.14", &t4_toe_rexmt_backoff[14]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.15", &t4_toe_rexmt_backoff[15]); #endif #ifdef DEV_NETMAP #define NNMTXQ_VI 2 static int t4_nnmtxq_vi = -NNMTXQ_VI; TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); #define NNMRXQ_VI 2 static int t4_nnmrxq_vi = -NNMRXQ_VI; TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* * Holdoff parameters for ports. */ #define TMR_IDX 1 int t4_tmr_idx = TMR_IDX; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx", &t4_tmr_idx); TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx); /* Old name */ #define PKTC_IDX (-1) int t4_pktc_idx = PKTC_IDX; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx", &t4_pktc_idx); TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx); /* Old name */ /* * Size (# of entries) of each tx and rx queue. */ unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. All the _CF names here are special. */ #define DEFAULT_CF "default" #define BUILTIN_CF "built-in" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Forward Error Correction settings (bit 0, 1, 2 = FEC_RS, FEC_BASER_RS, * FEC_RESERVED respectively). * -1 to run with the firmware default. * 0 to disable FEC. */ static int t4_fec = -1; TUNABLE_INT("hw.cxgbe.fec", &t4_fec); /* * Link autonegotiation. * -1 to run with the firmware default. * 0 to disable. * 1 to enable. */ static int t4_autoneg = -1; TUNABLE_INT("hw.cxgbe.autoneg", &t4_autoneg); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_nbmcaps_allowed = 0; TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC | FW_CAPS_CONFIG_NIC_HASHFILTER | FW_CAPS_CONFIG_NIC_ETHOFLD; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_cryptocaps_allowed = -1; TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed); static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 0; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* * PCIe Relaxed Ordering. * -1: driver should figure out a good value. * 0: disable RO. * 1: enable RO. * 2: leave RO alone. */ static int pcie_relaxed_ordering = -1; TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering); static int t4_panic_on_fatal_err = 0; TUNABLE_INT("hw.cxgbe.panic_on_fatal_err", &t4_panic_on_fatal_err); #ifdef TCP_OFFLOAD /* * TOE tunables. */ static int t4_cop_managed_offloading = 0; TUNABLE_INT("hw.cxgbe.cop_managed_offloading", &t4_cop_managed_offloading); #endif /* Functions used by VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_ETH, FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t num_vis; /* number of VIs for each port */ uint16_t nirq; /* Total # of vectors */ uint16_t ntxq; /* # of NIC txq's for each port */ uint16_t nrxq; /* # of NIC rxq's for each port */ uint16_t nofldtxq; /* # of TOE/ETHOFLD txq's for each port */ uint16_t nofldrxq; /* # of TOE rxq's for each port */ /* The vcxgbe/vcxl interfaces use these and not the ones above. */ uint16_t ntxq_vi; /* # of NIC txq's */ uint16_t nrxq_vi; /* # of NIC rxq's */ uint16_t nofldtxq_vi; /* # of TOE txq's */ uint16_t nofldrxq_vi; /* # of TOE rxq's */ uint16_t nnmtxq_vi; /* # of netmap txq's */ uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static int fixup_devlog_params(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static void init_l1cfg(struct port_info *); static int apply_l1cfg(struct port_info *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_fec(SYSCTL_HANDLER_ARGS); static int sysctl_autoneg(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); static int sysctl_loadavg(SYSCTL_HANDLER_ARGS); static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); static int sysctl_cpus(SYSCTL_HANDLER_ARGS); #ifdef TCP_OFFLOAD static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS); static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS); static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS); #endif static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int load_cfg(struct adapter *, struct t4_data *); static int load_boot(struct adapter *, struct t4_bootrom *); static int load_bootcfg(struct adapter *, struct t4_data *); static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *); static void free_offload_policy(struct t4_offload_policy *); static int set_offload_policy(struct adapter *, struct t4_offload_policy *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); static int notify_siblings(device_t, int); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ {0x5418, "Chelsio T540-BT"}, /* 4 x 10GBaseT */ {0x5419, "Chelsio T540-LP-BT"}, /* 4 x 10GBaseT */ {0x541a, "Chelsio T540-SO-BT"}, /* 4 x 10GBaseT, nomem */ {0x541b, "Chelsio T540-SO-CR"}, /* 4 x 10G, nomem */ }, t6_pciids[] = { {0xc006, "Chelsio Terminator 6 FPGA"}, /* T6 PE10K6 FPGA (PF0) */ {0x6400, "Chelsio T6-DBG-25"}, /* 2 x 10/25G, debug */ {0x6401, "Chelsio T6225-CR"}, /* 2 x 10/25G */ {0x6402, "Chelsio T6225-SO-CR"}, /* 2 x 10/25G, nomem */ {0x6403, "Chelsio T6425-CR"}, /* 4 x 10/25G */ {0x6404, "Chelsio T6425-SO-CR"}, /* 4 x 10/25G, nomem */ {0x6405, "Chelsio T6225-OCP-SO"}, /* 2 x 10/25G, nomem */ {0x6406, "Chelsio T62100-OCP-SO"}, /* 2 x 40/50/100G, nomem */ {0x6407, "Chelsio T62100-LP-CR"}, /* 2 x 40/50/100G */ {0x6408, "Chelsio T62100-SO-CR"}, /* 2 x 40/50/100G, nomem */ {0x6409, "Chelsio T6210-BT"}, /* 2 x 10GBASE-T */ {0x640d, "Chelsio T62100-CR"}, /* 2 x 40/50/100G */ {0x6410, "Chelsio T6-DBG-100"}, /* 2 x 40/50/100G, debug */ {0x6411, "Chelsio T6225-LL-CR"}, /* 2 x 10/25G */ {0x6414, "Chelsio T61100-OCP-SO"}, /* 1 x 40/50/100G, nomem */ {0x6415, "Chelsio T6201-BT"}, /* 2 x 1000BASE-T */ /* Custom */ {0x6480, "Custom T6225-CR"}, {0x6481, "Custom T62100-CR"}, {0x6482, "Custom T6225-CR"}, {0x6483, "Custom T62100-CR"}, {0x6484, "Custom T64100-CR"}, {0x6485, "Custom T6240-SO"}, {0x6486, "Custom T6225-SO-CR"}, {0x6487, "Custom T6225-CR"}, }; #ifdef TCP_OFFLOAD /* - * service_iq() has an iq and needs the fl. Offset of fl from the iq should be - * exactly the same for both rxq and ofld_rxq. + * service_iq_fl() has an iq and needs the fl. Offset of fl from the iq should + * be exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t6_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); for (i = 0; i < nitems(t6_pciids); i++) { if (d == t6_pciids[i].device) { device_set_desc(dev, t6_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static const struct devnames devnames[] = { { .nexus_name = "t4nex", .ifnet_name = "cxgbe", .vi_ifnet_name = "vcxgbe", .pf03_drv_name = "t4iov", .vf_nexus_name = "t4vf", .vf_ifnet_name = "cxgbev" }, { .nexus_name = "t5nex", .ifnet_name = "cxl", .vi_ifnet_name = "vcxl", .pf03_drv_name = "t5iov", .vf_nexus_name = "t5vf", .vf_ifnet_name = "cxlv" }, { .nexus_name = "t6nex", .ifnet_name = "cc", .vi_ifnet_name = "vcc", .pf03_drv_name = "t6iov", .vf_nexus_name = "t6vf", .vf_ifnet_name = "ccv" } }; void t4_init_devnames(struct adapter *sc) { int id; id = chip_id(sc); if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames)) sc->names = &devnames[id - CHELSIO_T4]; else { device_printf(sc->dev, "chip id %d is not supported.\n", id); sc->names = NULL; } } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, rqidx, tqidx, nports; struct make_dev_args mda; struct intrs_and_queues iaq; struct sge *s; uint32_t *buf; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) int ofld_tqidx; #endif #ifdef TCP_OFFLOAD int ofld_rqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); if (pcie_relaxed_ordering == 0 && (v | PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) { v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } else if (pcie_relaxed_ordering == 1 && (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) { v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } } sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS); sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL); sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); t4_add_adapter(sc); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); sc->policy = NULL; rw_init(&sc->policy_lock, "connection offload policy"); rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); /* Prepare the adapter for operation. */ buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_prep_adapter(sc, buf); free(buf, M_CXGBE); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ j = t4_read_reg(sc, A_PL_WHOAMI); sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j); sc->mbox = sc->pf; t4_init_devnames(sc); if (sc->names == NULL) { rc = ENOTSUP; goto done; /* error message displayed already */ } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); if (t4_init_devlog_params(sc, 0) == 0) fixup_devlog_params(sc); make_dev_args_init(&mda); mda.mda_devsw = &t4_cdevsw; mda.mda_uid = UID_ROOT; mda.mda_gid = GID_WHEEL; mda.mda_mode = 0600; mda.mda_si_drv1 = sc; rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); if (rc != 0) device_printf(dev, "failed to create nexus char device: %d.\n", rc); /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. */ for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* * XXX: vi[0] is special so we can't delay this allocation until * pi->nvi's final value is known. */ pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; /* All VIs on this port share this media. */ ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); pi->dev = device_add_child(dev, sc->names->ifnet_name, -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ nports = sc->params.nports; rc = cfg_itype_and_nqueues(sc, &iaq); if (rc != 0) goto done; /* error message displayed already */ num_vis = iaq.num_vis; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = nports * iaq.nrxq; s->ntxq = nports * iaq.ntxq; if (num_vis > 1) { s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi; s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi; } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #if defined(TCP_OFFLOAD) || defined(RATELIMIT) if (is_offload(sc) || is_ethoffload(sc)) { s->nofldtxq = nports * iaq.nofldtxq; if (num_vis > 1) s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi; s->neq += s->nofldtxq; s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = nports * iaq.nofldrxq; if (num_vis > 1) s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi; s->neq += s->nofldrxq; /* free list */ s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP if (num_vis > 1) { s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi; s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi; } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); t4_init_smt(sc, M_WAITOK); t4_init_tx_sched(sc); #ifdef RATELIMIT t4_init_etid_table(sc); #endif /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) ofld_tqidx = 0; #endif #ifdef TCP_OFFLOAD ofld_rqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; pi->nvi = num_vis; for_each_vi(pi, j, vi) { vi->pi = pi; vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->first_rxq = rqidx; vi->first_txq = tqidx; vi->tmr_idx = t4_tmr_idx; vi->pktc_idx = t4_pktc_idx; vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi; rqidx += vi->nrxq; tqidx += vi->ntxq; if (j == 0 && vi->ntxq > 1) vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) vi->first_ofld_txq = ofld_tqidx; vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi; ofld_tqidx += vi->nofldtxq; #endif #ifdef TCP_OFFLOAD vi->ofld_tmr_idx = t4_tmr_idx_ofld; vi->ofld_pktc_idx = t4_pktc_idx_ofld; vi->first_ofld_rxq = ofld_rqidx; vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi; ofld_rqidx += vi->nofldrxq; #endif #ifdef DEV_NETMAP if (j > 0) { vi->first_nm_rxq = nm_rqidx; vi->first_nm_txq = nm_tqidx; vi->nnmrxq = iaq.nnmrxq_vi; vi->nnmtxq = iaq.nnmtxq_vi; nm_rqidx += vi->nnmrxq; nm_tqidx += vi->nnmtxq; } #endif } } rc = t4_setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_probe(dev); if (rc != 0) { device_printf(dev, "failed to probe child drivers: %d\n", rc); goto done; } /* * Ensure thread-safe mailbox access (in debug builds). * * So far this was the only thread accessing the mailbox but various * ifnets and sysctls are about to be created and their handlers/ioctls * will access the mailbox from different threads. */ sc->flags |= CHK_MBOX_ACCESS; rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); notify_siblings(dev, 0); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach_common(dev); else t4_sysctls(sc); return (rc); } static int t4_ready(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); if (sc->flags & FW_OK) return (0); return (ENXIO); } static int t4_read_port_device(device_t dev, int port, device_t *child) { struct adapter *sc; struct port_info *pi; sc = device_get_softc(dev); if (port < 0 || port >= MAX_NPORTS) return (EINVAL); pi = sc->port[port]; if (pi == NULL || pi->dev == NULL) return (ENXIO); *child = pi->dev; return (0); } static int notify_siblings(device_t dev, int detaching) { device_t sibling; int error, i; error = 0; for (i = 0; i < PCI_FUNCMAX; i++) { if (i == pci_get_function(dev)) continue; sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), i); if (sibling == NULL || !device_is_attached(sibling)) continue; if (detaching) error = T4_DETACH_CHILD(sibling); else (void)T4_ATTACH_CHILD(sibling); if (error) break; } return (error); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; int rc; sc = device_get_softc(dev); rc = notify_siblings(dev, 1); if (rc) { device_printf(dev, "failed to detach sibling devices: %d\n", rc); return (rc); } return (t4_detach_common(dev)); } int t4_detach_common(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } sc->flags &= ~CHK_MBOX_ACCESS; if (sc->flags & FULL_INIT_DONE) { if (!(sc->flags & IS_VF)) t4_intr_disable(sc); } if (device_is_attached(dev)) { rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_free_tx_sched(sc); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); } } device_delete_children(dev); if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); if (sc->smt) t4_free_smt(sc->smt); #ifdef RATELIMIT t4_free_etid_table(sc); #endif #if defined(TCP_OFFLOAD) || defined(RATELIMIT) free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); free(sc->tids.hpftid_tab, M_CXGBE); if (sc->tids.hftid_tab) free_hftid_tab(&sc->tids); free(sc->tids.atid_tab, M_CXGBE); free(sc->tids.tid_tab, M_CXGBE); free(sc->tt.tls_rx_ports, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) { mtx_destroy(&sc->tids.ftid_lock); cv_destroy(&sc->tids.ftid_cv); } if (mtx_initialized(&sc->tids.atid_lock)) mtx_destroy(&sc->tids.atid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->reg_lock)) mtx_destroy(&sc->reg_lock); if (rw_initialized(&sc->policy_lock)) { rw_destroy(&sc->policy_lock); #ifdef TCP_OFFLOAD if (sc->policy != NULL) free_offload_policy(sc->policy); #endif } for (i = 0; i < NUM_MEMWIN; i++) { struct memwin *mw = &sc->memwin[i]; if (rw_initialized(&mw->mw_lock)) rw_destroy(&mw->mw_lock); } bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; #ifdef RATELIMIT ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc; ifp->if_snd_tag_modify = cxgbe_snd_tag_modify; ifp->if_snd_tag_query = cxgbe_snd_tag_query; ifp->if_snd_tag_free = cxgbe_snd_tag_free; #endif ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) ifp->if_capabilities |= IFCAP_NETMAP; #endif #ifdef RATELIMIT if (is_ethoffload(vi->pi->adapter) && vi->nofldtxq != 0) ifp->if_capabilities |= IFCAP_TXRTLMT; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_attach(vi); #endif sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #if defined(TCP_OFFLOAD) || defined(RATELIMIT) switch (ifp->if_capabilities & (IFCAP_TOE | IFCAP_TXRTLMT)) { case IFCAP_TOE: sbuf_printf(sb, "; %d txq (TOE)", vi->nofldtxq); break; case IFCAP_TOE | IFCAP_TXRTLMT: sbuf_printf(sb, "; %d txq (TOE/ETHOFLD)", vi->nofldtxq); break; case IFCAP_TXRTLMT: sbuf_printf(sb, "; %d txq (ETHOFLD)", vi->nofldtxq); break; } #endif #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, ", %d rxq (TOE)", vi->nofldrxq); #endif #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) sbuf_printf(sb, "; %d txq, %d rxq (netmap)", vi->nnmtxq, vi->nnmrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_detach(vi); #endif cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); ifmedia_removeall(&pi->media); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if (mtu < ETHERMIN || mtu > MAX_MTU) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, 0); break; case SIOCADDMULTI: case SIOCDELMULTI: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, 0); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef RATELIMIT if (mask & IFCAP_TXRTLMT) ifp->if_capenable ^= IFCAP_TXRTLMT; #endif #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifmedia_ioctl(ifp, ifr, &pi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(sc, &m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } #ifdef RATELIMIT if (m->m_pkthdr.snd_tag != NULL) { /* EAGAIN tells the stack we are not the correct interface. */ if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) { m_freem(m); return (EAGAIN); } return (ethofld_transmit(ifp, m)); } #endif /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_QFLUSH; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } TXQ_LOCK(txq); txq->eq.flags &= ~EQ_QFLUSH; TXQ_UNLOCK(txq); } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1 || sc->flags & IS_VF) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets); case IFCOUNTER_OBYTES: return (s->tx_octets); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } /* * The kernel picks a media from the list we had provided so we do not have to * validate the request. */ static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia *ifm = &pi->media; struct link_config *lc = &pi->link_cfg; struct adapter *sc = pi->adapter; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mec"); if (rc != 0) return (rc); PORT_LOCK(pi); if (IFM_SUBTYPE(ifm->ifm_media) == IFM_AUTO) { MPASS(lc->supported & FW_PORT_CAP_ANEG); lc->requested_aneg = AUTONEG_ENABLE; } else { lc->requested_aneg = AUTONEG_DISABLE; lc->requested_speed = ifmedia_baudrate(ifm->ifm_media) / 1000000; lc->requested_fc = 0; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) lc->requested_fc |= PAUSE_RX; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) lc->requested_fc |= PAUSE_TX; } if (pi->up_vis > 0) rc = apply_l1cfg(pi); PORT_UNLOCK(pi); end_synchronized_op(sc, 0); return (rc); } /* * Mbps to FW_PORT_CAP_SPEED_* bit. */ static uint16_t speed_to_fwspeed(int speed) { switch (speed) { case 100000: return (FW_PORT_CAP_SPEED_100G); case 40000: return (FW_PORT_CAP_SPEED_40G); case 25000: return (FW_PORT_CAP_SPEED_25G); case 10000: return (FW_PORT_CAP_SPEED_10G); case 1000: return (FW_PORT_CAP_SPEED_1G); case 100: return (FW_PORT_CAP_SPEED_100M); } return (0); } /* * Base media word (without ETHER, pause, link active, etc.) for the port at the * given speed. */ static int port_mword(struct port_info *pi, uint16_t speed) { MPASS(speed & M_FW_PORT_CAP_SPEED); MPASS(powerof2(speed)); switch(pi->port_type) { case FW_PORT_TYPE_BT_SGMII: case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: /* BaseT */ switch (speed) { case FW_PORT_CAP_SPEED_100M: return (IFM_100_T); case FW_PORT_CAP_SPEED_1G: return (IFM_1000_T); case FW_PORT_CAP_SPEED_10G: return (IFM_10G_T); } break; case FW_PORT_TYPE_KX4: if (speed == FW_PORT_CAP_SPEED_10G) return (IFM_10G_KX4); break; case FW_PORT_TYPE_CX4: if (speed == FW_PORT_CAP_SPEED_10G) return (IFM_10G_CX4); break; case FW_PORT_TYPE_KX: if (speed == FW_PORT_CAP_SPEED_1G) return (IFM_1000_KX); break; case FW_PORT_TYPE_KR: case FW_PORT_TYPE_BP_AP: case FW_PORT_TYPE_BP4_AP: case FW_PORT_TYPE_BP40_BA: case FW_PORT_TYPE_KR4_100G: case FW_PORT_TYPE_KR_SFP28: case FW_PORT_TYPE_KR_XLAUI: switch (speed) { case FW_PORT_CAP_SPEED_1G: return (IFM_1000_KX); case FW_PORT_CAP_SPEED_10G: return (IFM_10G_KR); case FW_PORT_CAP_SPEED_25G: return (IFM_25G_KR); case FW_PORT_CAP_SPEED_40G: return (IFM_40G_KR4); case FW_PORT_CAP_SPEED_100G: return (IFM_100G_KR4); } break; case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_QSA: case FW_PORT_TYPE_QSFP: case FW_PORT_TYPE_CR4_QSFP: case FW_PORT_TYPE_CR_QSFP: case FW_PORT_TYPE_CR2_QSFP: case FW_PORT_TYPE_SFP28: /* Pluggable transceiver */ switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: switch (speed) { case FW_PORT_CAP_SPEED_1G: return (IFM_1000_LX); case FW_PORT_CAP_SPEED_10G: return (IFM_10G_LR); case FW_PORT_CAP_SPEED_25G: return (IFM_25G_LR); case FW_PORT_CAP_SPEED_40G: return (IFM_40G_LR4); case FW_PORT_CAP_SPEED_100G: return (IFM_100G_LR4); } break; case FW_PORT_MOD_TYPE_SR: switch (speed) { case FW_PORT_CAP_SPEED_1G: return (IFM_1000_SX); case FW_PORT_CAP_SPEED_10G: return (IFM_10G_SR); case FW_PORT_CAP_SPEED_25G: return (IFM_25G_SR); case FW_PORT_CAP_SPEED_40G: return (IFM_40G_SR4); case FW_PORT_CAP_SPEED_100G: return (IFM_100G_SR4); } break; case FW_PORT_MOD_TYPE_ER: if (speed == FW_PORT_CAP_SPEED_10G) return (IFM_10G_ER); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: switch (speed) { case FW_PORT_CAP_SPEED_1G: return (IFM_1000_CX); case FW_PORT_CAP_SPEED_10G: return (IFM_10G_TWINAX); case FW_PORT_CAP_SPEED_25G: return (IFM_25G_CR); case FW_PORT_CAP_SPEED_40G: return (IFM_40G_CR4); case FW_PORT_CAP_SPEED_100G: return (IFM_100G_CR4); } break; case FW_PORT_MOD_TYPE_LRM: if (speed == FW_PORT_CAP_SPEED_10G) return (IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_NA: MPASS(0); /* Not pluggable? */ /* fall throough */ case FW_PORT_MOD_TYPE_ERROR: case FW_PORT_MOD_TYPE_UNKNOWN: case FW_PORT_MOD_TYPE_NOTSUPPORTED: break; case FW_PORT_MOD_TYPE_NONE: return (IFM_NONE); } break; case FW_PORT_TYPE_NONE: return (IFM_NONE); } return (IFM_UNKNOWN); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4med") != 0) return; PORT_LOCK(pi); if (pi->up_vis == 0) { /* * If all the interfaces are administratively down the firmware * does not report transceiver changes. Refresh port info here * so that ifconfig displays accurate ifmedia at all times. * This is the only reason we have a synchronized op in this * function. Just PORT_LOCK would have been enough otherwise. */ t4_update_port_info(pi); build_medialist(pi, &pi->media); } /* ifm_status */ ifmr->ifm_status = IFM_AVALID; if (lc->link_ok == 0) goto done; ifmr->ifm_status |= IFM_ACTIVE; /* ifm_active */ ifmr->ifm_active = IFM_ETHER | IFM_FDX; ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE); if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; ifmr->ifm_active |= port_mword(pi, speed_to_fwspeed(lc->speed)); done: PORT_UNLOCK(pi); end_synchronized_op(sc, 0); } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi) { int func, index, rc; uint32_t param, val; ASSERT_SYNCHRONIZED_OP(sc); index = vi - pi->vi; MPASS(index > 0); /* This function deals with _extra_ VIs only */ KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(vi->dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(vi->dev, "failed to allocate virtual interface %d" "for port %d: %d\n", index, pi->port_id, -rc); return (-rc); } vi->viid = rc; if (chip_id(sc) <= CHELSIO_T5) vi->smt_idx = (rc & 0x7f) << 1; else vi->smt_idx = (rc & 0x7f); if (vi->rss_size == 1) { /* * This VI didn't get a slice of the RSS table. Reduce the * number of VIs being created (hw.cxgbe.num_vis) or modify the * configuration file (nvi, rssnvi for this PF) if this is a * problem. */ device_printf(vi->dev, "RSS table not available.\n"); vi->rss_base = 0xffff; return (0); } param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { MPASS((val >> 16) == vi->rss_size); vi->rss_base = val & 0xffff; } return (0); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int rc; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via"); if (rc) return (rc); rc = alloc_extra_vi(sc, pi, vi); end_synchronized_op(sc, 0); if (rc) return (rc); rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); if (t4_panic_on_fatal_err) panic("panic requested on fatal error"); } void t4_add_adapter(struct adapter *sc) { sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); } int t4_map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } int t4_map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (chip_id(sc) >= CHELSIO_T5) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc, mode; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0); t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | mode); } #endif } sc->iwt.wc_en = isset(&sc->doorbells, DOORBELL_UDBWC) ? 1 : 0; return (0); } struct memwin_init { uint32_t base; uint32_t aperture; }; static const struct memwin_init t4_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin_init t5_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin_init *mw_init; struct memwin *mw; int i; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw_init = &t4_memwin[0]; } else { /* T5+ use the relative offset inside the PCIe BAR */ bar0 = 0; mw_init = &t5_memwin[0]; } for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) { rw_init(&mw->mw_lock, "memory window access"); mw->mw_base = mw_init->base; mw->mw_aperture = mw_init->aperture; mw->mw_curpos = 0; t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->mw_base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->mw_aperture) - 10)); rw_wlock(&mw->mw_lock); position_memwin(sc, i, 0); rw_wunlock(&mw->mw_lock); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Positions the memory window at the given address in the card's address space. * There are some alignment requirements and the actual position may be at an * address prior to the requested address. mw->mw_curpos always has the actual * position of the window. */ static void position_memwin(struct adapter *sc, int idx, uint32_t addr) { struct memwin *mw; uint32_t pf; uint32_t reg; MPASS(idx >= 0 && idx < NUM_MEMWIN); mw = &sc->memwin[idx]; rw_assert(&mw->mw_lock, RA_WLOCKED); if (is_t4(sc)) { pf = 0; mw->mw_curpos = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); mw->mw_curpos = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx); t4_write_reg(sc, reg, mw->mw_curpos | pf); t4_read_reg(sc, reg); /* flush */ } int rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len, int rw) { struct memwin *mw; uint32_t mw_end, v; MPASS(idx >= 0 && idx < NUM_MEMWIN); /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); mw = &sc->memwin[idx]; while (len > 0) { rw_rlock(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; if (addr >= mw_end || addr < mw->mw_curpos) { /* Will need to reposition the window */ if (!rw_try_upgrade(&mw->mw_lock)) { rw_runlock(&mw->mw_lock); rw_wlock(&mw->mw_lock); } rw_assert(&mw->mw_lock, RA_WLOCKED); position_memwin(sc, idx, addr); rw_downgrade(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; } rw_assert(&mw->mw_lock, RA_RLOCKED); while (addr < mw_end && len > 0) { if (rw == 0) { v = t4_read_reg(sc, mw->mw_base + addr - mw->mw_curpos); *val++ = le32toh(v); } else { v = *val++; t4_write_reg(sc, mw->mw_base + addr - mw->mw_curpos, htole32(v)); } addr += 4; len -= 4; } rw_runlock(&mw->mw_lock); } return (0); } int alloc_atid_tab(struct tid_info *t, int flags) { int i; MPASS(t->natids > 0); MPASS(t->atid_tab == NULL); t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE, M_ZERO | flags); if (t->atid_tab == NULL) return (ENOMEM); mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); t->afree = t->atid_tab; t->atids_in_use = 0; for (i = 1; i < t->natids; i++) t->atid_tab[i - 1].next = &t->atid_tab[i]; t->atid_tab[t->natids - 1].next = NULL; return (0); } void free_atid_tab(struct tid_info *t) { KASSERT(t->atids_in_use == 0, ("%s: %d atids still in use.", __func__, t->atids_in_use)); if (mtx_initialized(&t->atid_lock)) mtx_destroy(&t->atid_lock); free(t->atid_tab, M_CXGBE); t->atid_tab = NULL; } int alloc_atid(struct adapter *sc, void *ctx) { struct tid_info *t = &sc->tids; int atid = -1; mtx_lock(&t->atid_lock); if (t->afree) { union aopen_entry *p = t->afree; atid = p - t->atid_tab; MPASS(atid <= M_TID_TID); t->afree = p->next; p->data = ctx; t->atids_in_use++; } mtx_unlock(&t->atid_lock); return (atid); } void * lookup_atid(struct adapter *sc, int atid) { struct tid_info *t = &sc->tids; return (t->atid_tab[atid].data); } void free_atid(struct adapter *sc, int atid) { struct tid_info *t = &sc->tids; union aopen_entry *p = &t->atid_tab[atid]; mtx_lock(&t->atid_lock); p->next = t->afree; t->afree = p; t->atids_in_use--; mtx_unlock(&t->atid_lock); } static void queue_tid_release(struct adapter *sc, int tid) { CXGBE_UNIMPLEMENTED("deferred tid release"); } void release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) { struct wrqe *wr; struct cpl_tid_release *req; wr = alloc_wrqe(sizeof(*req), ctrlq); if (wr == NULL) { queue_tid_release(sc, tid); /* defer */ return; } req = wrtod(wr); INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); t4_wrq_tx(sc, wr); } static int t4_range_cmp(const void *a, const void *b) { return ((const struct t4_range *)a)->start - ((const struct t4_range *)b)->start; } /* * Verify that the memory range specified by the addr/len pair is valid within * the card's address space. */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { struct t4_range mem_ranges[4], *r, *next; uint32_t em, addr_len; int i, n, remaining; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); r = &mem_ranges[0]; n = 0; bzero(r, sizeof(mem_ranges)); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); r->size = G_EDRAM0_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM0_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); r->size = G_EDRAM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); r->size = G_EXT_MEM_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); r->size = G_EXT_MEM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } MPASS(n <= nitems(mem_ranges)); if (n > 1) { /* Sort and merge the ranges. */ qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp); /* Start from index 0 and examine the next n - 1 entries. */ r = &mem_ranges[0]; for (remaining = n - 1; remaining > 0; remaining--, r++) { MPASS(r->size > 0); /* r is a valid entry. */ next = r + 1; MPASS(next->size > 0); /* and so is the next one. */ while (r->start + r->size >= next->start) { /* Merge the next one into the current entry. */ r->size = max(r->start + r->size, next->start + next->size) - r->start; n--; /* One fewer entry in total. */ if (--remaining == 0) goto done; /* short circuit */ next++; } if (next != r + 1) { /* * Some entries were merged into r and next * points to the first valid entry that couldn't * be merged. */ MPASS(next->size > 0); /* must be valid */ memcpy(r + 1, next, remaining * sizeof(*r)); #ifdef INVARIANTS /* * This so that the foo->size assertion in the * next iteration of the loop do the right * thing for entries that were pulled up and are * no longer valid. */ MPASS(n < nitems(mem_ranges)); bzero(&mem_ranges[n], (nitems(mem_ranges) - n) * sizeof(struct t4_range)); #endif } } done: /* Done merging the ranges. */ MPASS(n > 0); r = &mem_ranges[0]; for (i = 0; i < n; i++, r++) { if (addr >= r->start && addr + len <= r->start + r->size) return (0); } } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; break; case MEM_MC1: if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; break; default: return (EINVAL); } *addr = maddr + off; /* global address */ return (validate_mem_range(sc, *addr, len)); } static int fixup_devlog_params(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; int rc; rc = validate_mt_off_len(sc, dparams->memtype, dparams->start, dparams->size, &dparams->addr); return (rc); } static void update_nirq(struct intrs_and_queues *iaq, int nports) { int extra = T4_EXTRA_INTR; iaq->nirq = extra; iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq); iaq->nirq += nports * (iaq->num_vis - 1) * max(iaq->nrxq_vi, iaq->nnmrxq_vi); iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi; } /* * Adjust requirements to fit the number of interrupts available. */ static void calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, int navail) { int old_nirq; const int nports = sc->params.nports; MPASS(nports > 0); MPASS(navail > 0); bzero(iaq, sizeof(*iaq)); iaq->intr_type = itype; iaq->num_vis = t4_num_vis; iaq->ntxq = t4_ntxq; iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq = t4_nrxq; iaq->nrxq_vi = t4_nrxq_vi; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) if (is_offload(sc) || is_ethoffload(sc)) { iaq->nofldtxq = t4_nofldtxq; iaq->nofldtxq_vi = t4_nofldtxq_vi; } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldrxq = t4_nofldrxq; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP iaq->nnmtxq_vi = t4_nnmtxq_vi; iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { /* * This is the normal case -- there are enough interrupts for * everything. */ goto done; } /* * If extra VIs have been configured try reducing their count and see if * that works. */ while (iaq->num_vis > 1) { iaq->num_vis--; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "virtual interfaces per port " "reduced to %d from %d. nrxq=%u, nofldrxq=%u, " "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u. " "itype %d, navail %u, nirq %d.\n", iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); goto done; } } /* * Extra VIs will not be created. Log a message if they were requested. */ MPASS(iaq->num_vis == 1); iaq->ntxq_vi = iaq->nrxq_vi = 0; iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; if (iaq->num_vis != t4_num_vis) { device_printf(sc->dev, "extra virtual interfaces disabled. " "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, " "nnmrxq_vi=%u. itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); } /* * Keep reducing the number of NIC rx queues to the next lower power of * 2 (for even RSS distribution) and halving the TOE rx queues and see * if that works. */ do { if (iaq->nrxq > 1) { do { iaq->nrxq--; } while (!powerof2(iaq->nrxq)); } if (iaq->nofldrxq > 1) iaq->nofldrxq >>= 1; old_nirq = iaq->nirq; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "running with reduced number of " "rx queues because of shortage of interrupts. " "nrxq=%u, nofldrxq=%u. " "itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, itype, navail, iaq->nirq); goto done; } } while (old_nirq != iaq->nirq); /* One interrupt for everything. Ugh. */ device_printf(sc->dev, "running with minimal number of queues. " "itype %d, navail %u.\n", itype, navail); iaq->nirq = 1; MPASS(iaq->nrxq == 1); iaq->ntxq = 1; if (iaq->nofldrxq > 1) iaq->nofldtxq = 1; done: MPASS(iaq->num_vis > 0); if (iaq->num_vis > 1) { MPASS(iaq->nrxq_vi > 0); MPASS(iaq->ntxq_vi > 0); } MPASS(iaq->nirq > 0); MPASS(iaq->nrxq > 0); MPASS(iaq->ntxq > 0); if (itype == INTR_MSI) { MPASS(powerof2(iaq->nirq)); } } static int cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) { int rc, itype, navail, nalloc; for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; calculate_iaq(sc, iaq, itype, navail); nalloc = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &nalloc); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &nalloc); if (rc == 0 && nalloc > 0) { if (nalloc == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate. */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, nalloc); pci_release_msi(sc->dev); navail = nalloc; goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, nalloc); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, }, { .chip = CHELSIO_T6, .kld_name = "t6fw_cfg", .fw_mod_name = "t6fw", .fw_hdr = { .chip = FW_HDR_CHIP_T6, .fw_ver = htobe32(FW_VERSION(T6)), .intfver_nic = FW_INTFVER(T6, NIC), .intfver_vnic = FW_INTFVER(T6, VNIC), .intfver_ofld = FW_INTFVER(T6, OFLD), .intfver_ri = FW_INTFVER(T6, RI), .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T6, ISCSI), .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU), .intfver_fcoe = FW_INTFVER(T6, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) { card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); if (card_fw->fw_ver == be32toh(0xffffffff)) { uint32_t d = be32toh(kld_fw->fw_ver); if (!kld_fw_usable) { device_printf(sc->dev, "no firmware on the card and no usable " "firmware bundled with the driver.\n"); rc = EIO; goto done; } else if (t4_fw_install == 0) { device_printf(sc->dev, "no firmware on the card and the driver " "is prohibited from installing new " "firmware.\n"); rc = EIO; goto done; } device_printf(sc->dev, "no firmware on the card, " "installing firmware %d.%d.%d.%d\n", G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d)); rc = t4_fw_forceinstall(sc, fw->data, fw->datasize); if (rc < 0) { rc = -rc; device_printf(sc->dev, "firmware install failed: %d.\n", rc); goto done; } memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; } } else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); goto done; } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); rc = EPROTO; goto done; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } else if (strncmp(t4_cfg_file, BUILTIN_CF, sizeof(t4_cfg_file)) == 0) goto use_built_in_config; /* go straight to config. */ /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen; const uint32_t *cfdata; uint32_t param, val, addr; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } write_via_memwin(sc, 2, addr, cfdata, cflen); } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x). Will reset the firmware and retry " "with the built-in configuration.\n", rc, mtype, moff); rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST); if (rc != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) { t4_fw_bye(sc, sc->mbox); sc->flags &= ~FW_OK; } goto done; } snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", "built-in"); use_built_in_config: bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "built-in configuration failed: %d.\n", rc); goto done; } } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(nbmcaps); LIMIT_CAPS(linkcaps); LIMIT_CAPS(switchcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(cryptocaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS if (caps.niccaps & htobe16(FW_CAPS_CONFIG_NIC_HASHFILTER)) { /* * TOE and hashfilters are mutually exclusive. It is a config * file or firmware bug if both are reported as available. Try * to cope with the situation in non-debug builds by disabling * TOE. */ MPASS(caps.toecaps == 0); caps.toecaps = 0; caps.rdmacaps = 0; caps.iscsicaps = 0; } caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; t4_get_version_info(sc); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers), G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers)); snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers), G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers)); snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers), G_FW_HDR_FW_VER_MINOR(sc->params.er_vers), G_FW_HDR_FW_VER_MICRO(sc->params.er_vers), G_FW_HDR_FW_VER_BUILD(sc->params.er_vers)); param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ rc = -t4_init_devlog_params(sc, 1); if (rc == 0) fixup_devlog_params(sc); else { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); rc = 0; /* devlog isn't critical for device operation */ } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; if (val[3] > val[2]) { sc->tids.ftid_base = val[2]; sc->tids.ftid_end = val[3]; sc->tids.nftids = val[3] - val[2] + 1; } sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); sc->params.core_vdd = val[6]; if (chip_id(sc) >= CHELSIO_T6) { #ifdef INVARIANTS if (sc->params.fw_vers >= (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) | V_FW_HDR_FW_VER_MICRO(1) | V_FW_HDR_FW_VER_BUILD(0))) { /* * Note that the code to enable the region should run * before t4_fw_initialize and not here. This is just a * reminder to add said code. */ device_printf(sc->dev, "hpfilter region not enabled.\n"); } #endif sc->tids.tid_base = t4_read_reg(sc, A_LE_DB_ACTIVE_TABLE_START_INDEX); param[0] = FW_PARAM_PFVF(HPFILTER_START); param[1] = FW_PARAM_PFVF(HPFILTER_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query hpfilter parameters: %d.\n", rc); return (rc); } if ((int)val[1] > (int)val[0]) { sc->tids.hpftid_base = val[0]; sc->tids.hpftid_end = val[1]; sc->tids.nhpftids = val[1] - val[0] + 1; /* * These should go off if the layout changes and the * driver needs to catch up. */ MPASS(sc->tids.hpftid_base == 0); MPASS(sc->tids.tid_base == sc->tids.nhpftids); } } /* * MPSBGMAP is queried separately because only recent firmwares support * it as a parameter and we don't want the compound query above to fail * on older firmwares. */ param[0] = FW_PARAM_DEV(MPSBGMAP); val[0] = 0; rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val); if (rc == 0) sc->params.mps_bg_map = val[0]; else sc->params.mps_bg_map = 0; /* * Determine whether the firmware supports the filter2 work request. * This is queried separately for the same reason as MPSBGMAP above. */ param[0] = FW_PARAM_DEV(FILTER2_WR); val[0] = 0; rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val); if (rc == 0) sc->params.filter2_wr_support = val[0] != 0; else sc->params.filter2_wr_support = 0; /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(nbmcaps); READ_CAPS(linkcaps); READ_CAPS(switchcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(cryptocaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); if (sc->niccaps & FW_CAPS_CONFIG_NIC_HASHFILTER) { MPASS(chip_id(sc) > CHELSIO_T4); MPASS(sc->toecaps == 0); sc->toecaps = 0; param[0] = FW_PARAM_DEV(NTID); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query HASHFILTER parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; if (sc->params.fw_vers < (V_FW_HDR_FW_VER_MAJOR(1) | V_FW_HDR_FW_VER_MINOR(20) | V_FW_HDR_FW_VER_MICRO(5) | V_FW_HDR_FW_VER_BUILD(0))) { MPASS(sc->tids.ntids >= sc->tids.nhpftids); sc->tids.ntids -= sc->tids.nhpftids; } sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->params.hash_filter = 1; } if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } if (val[1] > val[0]) { sc->tids.etid_base = val[0]; sc->tids.etid_end = val[1]; sc->tids.netids = val[1] - val[0] + 1; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); if (val[2] > val[1]) { sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; } sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } else { /* * The firmware attempts memfree TOE configuration for -SO cards * and will report toecaps=0 if it runs out of resources (this * depends on the config file). It may not report 0 for other * capabilities dependent on the TOE in this case. Set them to * 0 here so that the driver doesn't bother tracking resources * that will never be used. */ sc->iscsicaps = 0; sc->rdmacaps = 0; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SRQ_START); param[1] = FW_PARAM_PFVF(SRQ_END); param[2] = FW_PARAM_DEV(MAXORDIRD_QP); param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(3): %d.\n", rc); return (rc); } sc->vres.srq.start = val[0]; sc->vres.srq.size = val[1] - val[0] + 1; sc->params.max_ordird_qp = val[2]; sc->params.max_ird_adapter = val[3]; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) { param[0] = FW_PARAM_PFVF(TLS_START); param[1] = FW_PARAM_PFVF(TLS_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TLS parameters: %d.\n", rc); return (rc); } sc->vres.key.start = val[0]; sc->vres.key.size = val[1] - val[0] + 1; } t4_init_sge_params(sc); /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; #ifdef TCP_OFFLOAD int i, v, shift; #endif /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); #ifdef TCP_OFFLOAD /* * Override the TOE timers with user provided tunables. This is not the * recommended way to change the timers (the firmware config file is) so * these tunables are not documented. * * All the timer tunables are in microseconds. */ if (t4_toe_keepalive_idle != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle); v &= M_KEEPALIVEIDLE; t4_set_reg_field(sc, A_TP_KEEP_IDLE, V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v)); } if (t4_toe_keepalive_interval != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval); v &= M_KEEPALIVEINTVL; t4_set_reg_field(sc, A_TP_KEEP_INTVL, V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v)); } if (t4_toe_keepalive_count != 0) { v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) | V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2), V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v)); } if (t4_toe_rexmt_min != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_min); v &= M_RXTMIN; t4_set_reg_field(sc, A_TP_RXT_MIN, V_RXTMIN(M_RXTMIN), V_RXTMIN(v)); } if (t4_toe_rexmt_max != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_max); v &= M_RXTMAX; t4_set_reg_field(sc, A_TP_RXT_MAX, V_RXTMAX(M_RXTMAX), V_RXTMAX(v)); } if (t4_toe_rexmt_count != 0) { v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) | V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2), V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v)); } for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) { if (t4_toe_rexmt_backoff[i] != -1) { v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0; shift = (i & 3) << 3; t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3), M_TIMERBACKOFFINDEX0 << shift, v << shift); } } #endif return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id); device_set_desc_copy(sc->dev, buf); } static inline void ifmedia_add4(struct ifmedia *ifm, int m) { ifmedia_add(ifm, m, 0, NULL); ifmedia_add(ifm, m | IFM_ETH_TXPAUSE, 0, NULL); ifmedia_add(ifm, m | IFM_ETH_RXPAUSE, 0, NULL); ifmedia_add(ifm, m | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE, 0, NULL); } static void set_current_media(struct port_info *pi, struct ifmedia *ifm) { struct link_config *lc; int mword; PORT_LOCK_ASSERT_OWNED(pi); /* Leave current media alone if it's already set to IFM_NONE. */ if (ifm->ifm_cur != NULL && IFM_SUBTYPE(ifm->ifm_cur->ifm_media) == IFM_NONE) return; lc = &pi->link_cfg; if (lc->requested_aneg == AUTONEG_ENABLE && lc->supported & FW_PORT_CAP_ANEG) { ifmedia_set(ifm, IFM_ETHER | IFM_AUTO); return; } mword = IFM_ETHER | IFM_FDX; if (lc->requested_fc & PAUSE_TX) mword |= IFM_ETH_TXPAUSE; if (lc->requested_fc & PAUSE_RX) mword |= IFM_ETH_RXPAUSE; mword |= port_mword(pi, speed_to_fwspeed(lc->requested_speed)); ifmedia_set(ifm, mword); } static void build_medialist(struct port_info *pi, struct ifmedia *ifm) { uint16_t ss, speed; int unknown, mword, bit; struct link_config *lc; PORT_LOCK_ASSERT_OWNED(pi); if (pi->flags & FIXED_IFMEDIA) return; /* * First setup all the requested_ fields so that they comply with what's * supported by the port + transceiver. Note that this clobbers any * user preferences set via sysctl_pause_settings or sysctl_autoneg. */ init_l1cfg(pi); /* * Now (re)build the ifmedia list. */ ifmedia_removeall(ifm); lc = &pi->link_cfg; ss = G_FW_PORT_CAP_SPEED(lc->supported); /* Supported Speeds */ if (__predict_false(ss == 0)) { /* not supposed to happen. */ MPASS(ss != 0); no_media: MPASS(LIST_EMPTY(&ifm->ifm_list)); ifmedia_add(ifm, IFM_ETHER | IFM_NONE, 0, NULL); ifmedia_set(ifm, IFM_ETHER | IFM_NONE); return; } unknown = 0; for (bit = 0; bit < fls(ss); bit++) { speed = 1 << bit; MPASS(speed & M_FW_PORT_CAP_SPEED); if (ss & speed) { mword = port_mword(pi, speed); if (mword == IFM_NONE) { goto no_media; } else if (mword == IFM_UNKNOWN) unknown++; else ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | mword); } } if (unknown > 0) /* Add one unknown for all unknown media types. */ ifmedia_add4(ifm, IFM_ETHER | IFM_FDX | IFM_UNKNOWN); if (lc->supported & FW_PORT_CAP_ANEG) ifmedia_add(ifm, IFM_ETHER | IFM_AUTO, 0, NULL); set_current_media(pi, ifm); } /* * Update all the requested_* fields in the link config to something valid (and * reasonable). */ static void init_l1cfg(struct port_info *pi) { struct link_config *lc = &pi->link_cfg; PORT_LOCK_ASSERT_OWNED(pi); /* Gbps -> Mbps */ lc->requested_speed = port_top_speed(pi) * 1000; if (t4_autoneg != 0 && lc->supported & FW_PORT_CAP_ANEG) { lc->requested_aneg = AUTONEG_ENABLE; } else { lc->requested_aneg = AUTONEG_DISABLE; } lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX); if (t4_fec != -1) { if (t4_fec & FEC_RS && lc->supported & FW_PORT_CAP_FEC_RS) { lc->requested_fec = FEC_RS; } else if (t4_fec & FEC_BASER_RS && lc->supported & FW_PORT_CAP_FEC_BASER_RS) { lc->requested_fec = FEC_BASER_RS; } else { lc->requested_fec = 0; } } else { /* Use the suggested value provided by the firmware in acaps */ if (lc->advertising & FW_PORT_CAP_FEC_RS && lc->supported & FW_PORT_CAP_FEC_RS) { lc->requested_fec = FEC_RS; } else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS && lc->supported & FW_PORT_CAP_FEC_BASER_RS) { lc->requested_fec = FEC_BASER_RS; } else { lc->requested_fec = 0; } } } /* * Apply the settings in requested_* to the hardware. The parameters are * expected to be sane. */ static int apply_l1cfg(struct port_info *pi) { struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; #ifdef INVARIANTS uint16_t fwspeed; ASSERT_SYNCHRONIZED_OP(sc); PORT_LOCK_ASSERT_OWNED(pi); if (lc->requested_aneg == AUTONEG_ENABLE) MPASS(lc->supported & FW_PORT_CAP_ANEG); if (lc->requested_fc & PAUSE_TX) MPASS(lc->supported & FW_PORT_CAP_FC_TX); if (lc->requested_fc & PAUSE_RX) MPASS(lc->supported & FW_PORT_CAP_FC_RX); if (lc->requested_fec == FEC_RS) MPASS(lc->supported & FW_PORT_CAP_FEC_RS); if (lc->requested_fec == FEC_BASER_RS) MPASS(lc->supported & FW_PORT_CAP_FEC_BASER_RS); fwspeed = speed_to_fwspeed(lc->requested_speed); MPASS(fwspeed != 0); MPASS(lc->supported & fwspeed); #endif rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) { device_printf(pi->dev, "l1cfg failed: %d\n", rc); } else { lc->fc = lc->requested_fc; lc->fec = lc->requested_fec; } return (rc); } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); if (pi->up_vis++ == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); apply_l1cfg(pi); } ifp->if_drv_flags |= IFF_DRV_RUNNING; if (pi->nvi > 1 || sc->flags & IS_VF) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { if (__predict_false(ifp->if_drv_flags & IFF_DRV_RUNNING)) { KASSERT(0, ("uninited VI is running")); if_printf(ifp, "uninited VI with running ifnet. " "vi->flags 0x%016lx, if_flags 0x%08x, " "if_drv_flags 0x%08x\n", vi->flags, ifp->if_flags, ifp->if_drv_flags); } return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi > 1 || sc->flags & IS_VF) callout_stop(&vi->tick); else callout_stop(&pi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->link_cfg.link_down_rc = 255; t4_os_link_changed(pi); pi->old_link_cfg = pi->link_cfg; PORT_UNLOCK(pi); return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ int t4_setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif #ifdef RSS int nbuckets = rss_getnumbuckets(); #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (forwarding_intr_to_fwq(sc)) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ if (sc->flags & IS_VF) KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); else KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr on PFs */ if (!(sc->flags & IS_VF)) { rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; } /* The second one is always the firmware event queue (first on VFs) */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; #endif for (q = 0; q < n; q++) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); if (q < vi->nrxq) irq->rxq = rxq++; #ifdef DEV_NETMAP if (q < vi->nnmrxq) irq->nm_rxq = nm_rxq++; + + if (irq->nm_rxq != NULL && + irq->rxq == NULL) { + /* Netmap rx only */ + rc = t4_alloc_irq(sc, irq, rid, + t4_nm_intr, irq->nm_rxq, s); + } + if (irq->nm_rxq != NULL && + irq->rxq != NULL) { + /* NIC and Netmap rx */ + rc = t4_alloc_irq(sc, irq, rid, + t4_vi_intr, irq, s); + } #endif - rc = t4_alloc_irq(sc, irq, rid, - t4_vi_intr, irq, s); + if (irq->rxq != NULL && + irq->nm_rxq == NULL) { + /* NIC rx only */ + rc = t4_alloc_irq(sc, irq, rid, + t4_intr, irq->rxq, s); + } if (rc != 0) return (rc); #ifdef RSS if (q < vi->nrxq) { bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); } #endif irq++; rid++; vi->nintr++; } } else { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); #endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; #ifdef RSS uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, &rss_key[0], -1, 1); #endif if (!(sc->flags & IS_VF)) t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accommodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); free(vi->nm_rss, M_CXGBE); } t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { regs->version = chip_id(sc) | chip_rev(sc) << 10; t4_get_regs(sc, buf, regs->len); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); if (sc->flags & IS_VF) { stats[0] = t4_read_reg(sc, VF_MPS_REG(reg)); stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4)); } else { t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); } return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->reg_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->reg_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { u_int i, v, tnl_cong_drops, bg_map; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); bg_map = pi->mps_bg_map; while (bg_map) { i = ffs(bg_map) - 1; mtx_lock(&sc->reg_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->reg_lock); tnl_cong_drops += v; bg_map &= ~(1 << i); } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } /* * Should match fw_caps_config_ enums in t4fw_interface.h */ static char *caps_decoder[] = { "\20\001IPMI\002NCSI", /* 0: NBM */ "\20\001PPP\002QFC\003DCBX", /* 1: link */ "\20\001INGRESS\002EGRESS", /* 2: switch */ "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL" /* 3: NIC */ "\006HASHFILTER\007ETHOFLD", "\20\001TOE", /* 4: TOE */ "\20\001RDDP\002RDMAC", /* 5: RDMA */ "\20\001INITIATOR_PDU\002TARGET_PDU" /* 6: iSCSI */ "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD" "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD" "\007T10DIF" "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD", "\20\001LOOKASIDE\002TLSKEYS", /* 7: Crypto */ "\20\001INITIATOR\002TARGET\003CTRL_OFLD" /* 8: FCoE */ "\004PO_INITIATOR\005PO_TARGET", }; void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, (uintptr_t)&sc->doorbells, sysctl_bitfield_8b, "A", "available doorbells"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version", CTLFLAG_RD, sc->tp_version, 0, "TP microcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); if (sc->flags & IS_VF) return; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn", CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn", CTLFLAG_RD, sc->params.vpd.pn, 0, "part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec", CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version", CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na", CTLFLAG_RD, sc->params.vpd.na, 0, "network address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD, sc->er_version, 0, "expansion ROM version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD, sc->bs_version, 0, "bootstrap firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD, NULL, sc->params.scfg_vers, "serial config version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD, NULL, sc->params.vpd_vers, "VPD version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); #define SYSCTL_CAP(name, n, text) \ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \ CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], (uintptr_t)&sc->name, \ sysctl_bitfield_16b, "A", "available " text " capabilities") SYSCTL_CAP(nbmcaps, 0, "NBM"); SYSCTL_CAP(linkcaps, 1, "link"); SYSCTL_CAP(switchcaps, 2, "switch"); SYSCTL_CAP(niccaps, 3, "NIC"); SYSCTL_CAP(toecaps, 4, "TCP offload"); SYSCTL_CAP(rdmacaps, 5, "RDMA"); SYSCTL_CAP(iscsicaps, 6, "iSCSI"); SYSCTL_CAP(cryptocaps, 7, "crypto"); SYSCTL_CAP(fcoecaps, 8, "FCoE"); #undef SYSCTL_CAP SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "loadavg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_loadavg, "A", "microprocessor load averages (debug firmwares only)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD, &sc->params.core_vdd, 0, "core Vdd (in mV)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "local_cpus", CTLTYPE_STRING | CTLFLAG_RD, sc, LOCAL_CPUS, sysctl_cpus, "A", "local CPUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_cpus", CTLTYPE_STRING | CTLFLAG_RD, sc, INTR_CPUS, sysctl_cpus, "A", "preferred CPUs for interrupts"); /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (chip_id(sc) > CHELSIO_T4) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "smt", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_smt, "A", "hardware source MAC table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (chip_id(sc) >= CHELSIO_T5) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #ifdef TCP_OFFLOAD if (is_offload(sc)) { int i; char s[4]; /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.cong_algorithm = -1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm", CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control " "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, " "3 = highspeed)"); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tls = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW, &sc->tt.tls, 0, "Inline TLS allowed"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports, "I", "TCP ports that use inline TLS+TOE RX"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); sc->tt.tx_zcopy = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy", CTLFLAG_RW, &sc->tt.tx_zcopy, 0, "Enable zero-copy aio_write(2)"); sc->tt.cop_managed_offloading = !!t4_cop_managed_offloading; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cop_managed_offloading", CTLFLAG_RW, &sc->tt.cop_managed_offloading, 0, "COP (Connection Offload Policy) controls all TOE offload"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A", "TP timer tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A", "TCP timestamp tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A", "DACK tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer", CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer, "IU", "DACK timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN, sysctl_tp_timer, "LU", "Minimum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX, sysctl_tp_timer, "LU", "Maximum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN, sysctl_tp_timer, "LU", "Persist timer min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX, sysctl_tp_timer, "LU", "Persist timer max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE, sysctl_tp_timer, "LU", "Keepalive idle timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL, sysctl_tp_timer, "LU", "Keepalive interval timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU", "FINWAIT2 timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU", "Number of SYN retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU", "Number of retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU", "Number of keepalive probes before abort"); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff", CTLFLAG_RD, NULL, "TOE retransmit backoffs"); children = SYSCTL_CHILDREN(oid); for (i = 0; i < 16; i++) { snprintf(s, sizeof(s), "%u", i); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s, CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff, "IU", "TOE retransmit backoff"); } } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.v?(cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL, vi->rss_size, "size of RSS indirection table"); if (IS_MAIN_VI(vi)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx_ofld, "I", "holdoff timer index for TOE queues"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx_ofld, "I", "holdoff packet counter index for TOE queues"); } #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, &vi->nnmrxq, 0, "# of netmap rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, &vi->nnmtxq, 0, "# of netmap tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", CTLFLAG_RD, &vi->first_nm_rxq, 0, "index of first netmap rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", CTLFLAG_RD, &vi->first_nm_txq, 0, "index of first netmap tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; int i; char name[16]; static char *tc_flags = {"\20\1USER\2SYNC\3ASYNC\4ERR"}; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A", "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I", "autonegotiation (-1 = not supported)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL, pi->mps_bg_map, "MPS buffer group map"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD, NULL, pi->rx_e_chan_map, "TP rx e-channel map"); if (sc->flags & IS_VF) return; /* * dev.(cxgbe|cxl).X.tc. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, "Tx scheduler traffic classes (cl_rl)"); children2 = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "pktsize", CTLFLAG_RW, &pi->sched_params->pktsize, 0, "pktsize for per-flow cl-rl (0 means up to the driver )"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "burstsize", CTLFLAG_RW, &pi->sched_params->burstsize, 0, "burstsize for per-flow cl-rl (0 means up to the driver)"); for (i = 0; i < sc->chip_params->nsched_cls; i++) { struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i]; snprintf(name, sizeof(name), "%d", i); children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "traffic class")); SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "flags", CTLTYPE_STRING | CTLFLAG_RD, tc_flags, (uintptr_t)&tc->flags, sysctl_bitfield_8b, "A", "flags"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", CTLFLAG_RD, &tc->refcount, 0, "references to this class"); SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, sysctl_tc_params, "A", "traffic class parameters"); } /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by up to 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records", CTLFLAG_RD, &pi->tx_tls_records, "# of TLS records transmitted"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets", CTLFLAG_RD, &pi->tx_tls_octets, "# of payload octets in transmitted TLS records"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records", CTLFLAG_RD, &pi->rx_tls_records, "# of TLS records received"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets", CTLFLAG_RD, &pi->rx_tls_octets, "# of payload octets in received TLS records"); } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", *(uint8_t *)(uintptr_t)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", *(uint16_t *)(uintptr_t)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); PORT_LOCK(pi); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fc = lc->requested_fc; set_current_media(pi, &pi->media); } } PORT_UNLOCK(pi); end_synchronized_op(sc, 0); } return (rc); } static int sysctl_fec(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1RS\2BASER_RS\3RESERVED"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fec & M_FW_PORT_CAP_FEC, bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fec & M_FW_PORT_CAP_FEC); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~M_FW_PORT_CAP_FEC) return (EINVAL); /* some other bit is set too */ if (!powerof2(n)) return (EINVAL); /* one bit can be set at most */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4fec"); if (rc) return (rc); PORT_LOCK(pi); if ((lc->requested_fec & M_FW_PORT_CAP_FEC) != n) { lc->requested_fec = n & G_FW_PORT_CAP_FEC(lc->supported); rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fec = lc->requested_fec; } } PORT_UNLOCK(pi); end_synchronized_op(sc, 0); } return (rc); } static int sysctl_autoneg(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc, val, old; if (lc->supported & FW_PORT_CAP_ANEG) val = lc->requested_aneg == AUTONEG_ENABLE ? 1 : 0; else val = -1; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (val == 0) val = AUTONEG_DISABLE; else if (val == 1) val = AUTONEG_ENABLE; else return (EINVAL); rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4aneg"); if (rc) return (rc); PORT_LOCK(pi); if ((lc->supported & FW_PORT_CAP_ANEG) == 0) { rc = ENOTSUP; goto done; } if (lc->requested_aneg == val) { rc = 0; /* no change, do nothing. */ goto done; } old = lc->requested_aneg; lc->requested_aneg = val; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) lc->requested_aneg = old; else set_current_media(pi, &pi->media); done: PORT_UNLOCK(pi); end_synchronized_op(sc, 0); return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } static int sysctl_loadavg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4lavg"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_LOAD); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); if (val == 0xffffffff) { /* Only debug and custom firmwares report load averages. */ sbuf_printf(sb, "not available"); } else { sbuf_printf(sb, "%d %d %d", val & 0xff, (val >> 8) & 0xff, (val >> 16) & 0xff); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = sc->chip_params->cim_num_obq; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) <= CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) > CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Inst Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data LS1Stat LS1Addr LS1Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x %08x", p[3] & 0xff, p[2], p[1], p[0]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x %02x%06x", (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16); } else { sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x " "%08x %08x %08x %08x %08x %08x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16, p[2], p[1], p[0], p[5], p[4], p[3]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, " Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_cpl_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\nCPL requests: %10u %10u %10u %10u", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "\nCPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\nCPL requests: %10u %10u", stats.req[0], stats.req[1]); sbuf_printf(sb, "\nCPL responses: %10u %10u", stats.rsp[0], stats.rsp[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats, 1); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char * const devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; static const char * const devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE", [FW_DEVLOG_FACILITY_CHNET] = "CHNET", }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->addr == 0) return (ENXIO); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size); if (rc != 0) goto done; nentries = dparams->size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[MAX_NCHAN]; int i, nchan = sc->chip_params->nchan; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nchan; i++) t4_get_fcoe_stats(sc, i, &stats[i], 1); if (nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp, stats[2].octets_ddp, stats[3].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp, stats[2].frames_ddp, stats[3].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u", stats[0].frames_drop, stats[1].frames_drop, stats[2].frames_drop, stats[3].frames_drop); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u", stats[0].frames_drop, stats[1].frames_drop); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg, 1); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < sc->chip_params->nchan; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct link_config *lc = &pi->link_cfg; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (lc->link_ok || lc->link_down_rc == 255) sbuf_printf(sb, "n/a"); else sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; if (from == to) return; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:", "TLS keys:", }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t5(sc) ? 3 : 2; /* Call it MC0 for T5 */ i++; } if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); md->limit = 0; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc)) { uint32_t size = 0; uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2); uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE); if (is_t5(sc)) { if (sge_ctrl & F_VFIFO_ENABLE) size = G_DBVFIFO_SIZE(fifo_size); } else size = G_T6_DBVFIFO_SIZE(fifo_size); if (size) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (size << 2) - 1; } } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = 0; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = 0; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; md->base = sc->vres.key.start; if (sc->vres.key.size) md->limit = md->base + sc->vres.key.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < sc->chip_params->nchan; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) <= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%36d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) > CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask VNI Mask" " IVLAN Vld DIP_Hit Lookup Port Vld Ports PF VF" " Replication" " P0 P1 P2 P3 ML\n"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint8_t dip_hit, vlan_vld, lookup_type, port_num; uint16_t ivlan; uint64_t tcamx, tcamy, val, mask; uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy; uint8_t addr[ETHER_ADDR_LEN]; ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0); if (i < 256) ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0); else ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamy = G_DMACH(val) << 32; tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); lookup_type = G_DATALKPTYPE(data2); port_num = G_DATAPORTNUM(data2); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI */ vniy = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); dip_hit = data2 & F_DATADIPHIT; vlan_vld = 0; } else { vniy = 0; dip_hit = 0; vlan_vld = data2 & F_DATAVIDH2; ivlan = G_VIDL(val); } ctl |= V_CTLXYBITSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamx = G_DMACH(val) << 32; tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI mask */ vnix = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); } else vnix = 0; if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (lookup_type && lookup_type != M_DATALKPTYPE) { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx %06x %06x - - %3c" " 'I' %4x %3c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } else { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx - - ", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask); if (vlan_vld) sbuf_printf(sb, "%4u Y ", ivlan); else sbuf_printf(sb, " - N "); sbuf_printf(sb, "- %3c %4x %3c %#x%4u%4d", lookup_type ? 'I' : 'O', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } if (cls_lo & F_T6_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t6mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%72d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x" " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc255_224), be32toh(ldst_cmd.u.mps.rplc.rplc223_192), be32toh(ldst_cmd.u.mps.rplc.rplc191_160), be32toh(ldst_cmd.u.mps.rplc.rplc159_128), be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%72s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#x", G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo), G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo), (cls_lo >> S_T6_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS]; uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS]; static const char *tx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:", "Tx FIFO wait", NULL, "Tx latency" }; static const char *rx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Flush:", "Rx FIFO wait", NULL, "Rx latency" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); } sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } if (chip_id(sc) > CHELSIO_T5) { sbuf_printf(sb, "\n Total wait Total occupancy"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); i += 2; MPASS(i < nitems(tx_stats)); sbuf_printf(sb, "\n Reads Total wait"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_rdma_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_tcp_stats(sc, &v4, &v6, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcp_out_rsts, v6.tcp_out_rsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcp_in_segs, v6.tcp_in_segs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcp_out_segs, v6.tcp_out_segs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcp_retrans_segs, v6.tcp_retrans_segs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->nhpftids) { sbuf_printf(sb, "HPFTID range: %u-%u, in use: %u\n", t->hpftid_base, t->hpftid_end, t->hpftids_in_use); } if (t->ntids) { sbuf_printf(sb, "TID range: "); if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b, hb; if (chip_id(sc) <= CHELSIO_T5) { b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; } else { b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX); hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE); } if (b) sbuf_printf(sb, "%u-%u, ", t->tid_base, b - 1); sbuf_printf(sb, "%u-%u", hb, t->ntids - 1); } else sbuf_printf(sb, "%u-%u", t->tid_base, t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u, in use: %u\n", t->ftid_base, t->ftid_end, t->ftids_in_use); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u, in use: %u\n", t->etid_base, t->etid_base + t->netids - 1, t->etids_in_use); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_err_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1], stats.mac_in_errs[2], stats.mac_in_errs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1], stats.hdr_in_errs[2], stats.hdr_in_errs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1], stats.tcp_in_errs[2], stats.tcp_in_errs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1], stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1], stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1], stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1], stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1], stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "macInErrs: %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1]); sbuf_printf(sb, "hdrInErrs: %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1]); sbuf_printf(sb, "tcpInErrs: %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1]); sbuf_printf(sb, "tcp6InErrs: %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]); sbuf_printf(sb, "tnlCongDrops: %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]); sbuf_printf(sb, "tnlTxDrops: %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]); sbuf_printf(sb, "ofldChanDrops: %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]); } sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofld_no_neigh, stats.ofld_cong_defer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tp_params *tpp = &sc->params.tp; u_int mask; int rc; mask = tpp->la_mask >> 16; rc = sysctl_handle_int(oidp, &mask, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (mask > 0xffff) return (EINVAL); tpp->la_mask = mask << 16; t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask); return (0); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static const struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static const struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static const struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[MAX_NCHAN], orate[MAX_NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju\n", nrate[0], nrate[1]); sbuf_printf(sb, "Offload B/s: %10ju %10ju", orate[0], orate[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; MPASS(chip_id(sc) >= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { int mode; mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v); if (mode == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (mode == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else { sbuf_printf(sb, "unknown mode %d", mode); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; enum cpu_sets op = arg2; cpuset_t cpuset; struct sbuf *sb; int i, rc; MPASS(op == LOCAL_CPUS || op == INTR_CPUS); CPU_ZERO(&cpuset); rc = bus_get_cpus(sc->dev, op, sizeof(cpuset), &cpuset); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); CPU_FOREACH(i) sbuf_printf(sb, "%d ", i); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #ifdef TCP_OFFLOAD static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int *old_ports, *new_ports; int i, new_count, rc; if (req->newptr == NULL && req->oldptr == NULL) return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) * sizeof(sc->tt.tls_rx_ports[0]))); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx"); if (rc) return (rc); if (sc->tt.num_tls_rx_ports == 0) { i = -1; rc = SYSCTL_OUT(req, &i, sizeof(i)); } else rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports, sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0])); if (rc == 0 && req->newptr != NULL) { new_count = req->newlen / sizeof(new_ports[0]); new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE, M_WAITOK); rc = SYSCTL_IN(req, new_ports, new_count * sizeof(new_ports[0])); if (rc) goto err; /* Allow setting to a single '-1' to clear the list. */ if (new_count == 1 && new_ports[0] == -1) { ADAPTER_LOCK(sc); old_ports = sc->tt.tls_rx_ports; sc->tt.tls_rx_ports = NULL; sc->tt.num_tls_rx_ports = 0; ADAPTER_UNLOCK(sc); free(old_ports, M_CXGBE); } else { for (i = 0; i < new_count; i++) { if (new_ports[i] < 1 || new_ports[i] > IPPORT_MAX) { rc = EINVAL; goto err; } } ADAPTER_LOCK(sc); old_ports = sc->tt.tls_rx_ports; sc->tt.tls_rx_ports = new_ports; sc->tt.num_tls_rx_ports = new_count; ADAPTER_UNLOCK(sc); free(old_ports, M_CXGBE); new_ports = NULL; } err: free(new_ports, M_CXGBE); } end_synchronized_op(sc, 0); return (rc); } static void unit_conv(char *buf, size_t len, u_int val, u_int factor) { u_int rem = val % factor; if (rem == 0) snprintf(buf, len, "%u", val / factor); else { while (rem % 10 == 0) rem /= 10; snprintf(buf, len, "%u.%u", val / factor, rem); } } static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; char buf[16]; u_int res, re; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); switch (arg2) { case 0: /* timer_tick */ re = G_TIMERRESOLUTION(res); break; case 1: /* TCP timestamp tick */ re = G_TIMESTAMPRESOLUTION(res); break; case 2: /* DACK tick */ re = G_DELAYEDACKRESOLUTION(res); break; default: return (EDOOFUS); } unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000); return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int res, dack_re, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); dack_re = G_DELAYEDACKRESOLUTION(res); v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER); return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; u_int tre; u_long tp_tick_us, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX || reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX || reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER); tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION)); tp_tick_us = (cclk_ps << tre) / 1000000; if (reg == A_TP_INIT_SRTT) v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg)); else v = tp_tick_us * t4_read_reg(sc, reg); return (sysctl_handle_long(oidp, &v, 0, req)); } /* * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is * passed to this function. */ static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int v; MPASS(idx >= 0 && idx <= 24); v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int shift, v, r; MPASS(idx >= 0 && idx < 16); r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3); shift = (idx & 3) << 3; v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_ofld_rxq *ofld_rxq; uint8_t v; idx = vi->ofld_tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4otmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1); for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } vi->ofld_tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->ofld_pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4opktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->ofld_pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } #endif static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); /* * The firmware, with the sole exception of the memory parity error * handler, runs from memory and not flash. It is almost always safe to * install a new firmware on a running system. Just set bit 1 in * hw.cxgbe.dflags or dev...dflags first. */ if (sc->flags & FULL_INIT_DONE && (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_cfg(struct adapter *sc, struct t4_data *cfg) { int rc; uint8_t *cfg_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (cfg->len == 0) { /* clear */ rc = -t4_load_cfg(sc, NULL, 0); goto done; } cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK); if (cfg_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(cfg->data, cfg_data, cfg->len); if (rc == 0) rc = -t4_load_cfg(sc, cfg_data, cfg->len); free(cfg_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_boot(struct adapter *sc, struct t4_bootrom *br) { int rc; uint8_t *br_data = NULL; u_int offset; if (br->len > 1024 * 1024) return (EFBIG); if (br->pf_offset == 0) { /* pfidx */ if (br->pfidx_addr > 7) return (EINVAL); offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr, A_PCIE_PF_EXPROM_OFST))); } else if (br->pf_offset == 1) { /* offset */ offset = G_OFFSET(br->pfidx_addr); } else { return (EINVAL); } rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr"); if (rc) return (rc); if (br->len == 0) { /* clear */ rc = -t4_load_boot(sc, NULL, offset, 0); goto done; } br_data = malloc(br->len, M_CXGBE, M_WAITOK); if (br_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(br->data, br_data, br->len); if (rc == 0) rc = -t4_load_boot(sc, br_data, offset, br->len); free(br_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_bootcfg(struct adapter *sc, struct t4_data *bc) { int rc; uint8_t *bc_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (bc->len == 0) { /* clear */ rc = -t4_load_bootcfg(sc, NULL, 0); goto done; } bc_data = malloc(bc->len, M_CXGBE, M_WAITOK); if (bc_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(bc->data, bc_data, bc->len); if (rc == 0) rc = -t4_load_bootcfg(sc, bc_data, bc->len); free(bc_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump) { int rc; struct cudbg_init *cudbg; void *handle, *buf; /* buf is large, don't block if no memory is available */ buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO); if (buf == NULL) return (ENOMEM); handle = cudbg_alloc_handle(); if (handle == NULL) { rc = ENOMEM; goto done; } cudbg = cudbg_get_init(handle); cudbg->adap = sc; cudbg->print = (cudbg_print_cb)printf; #ifndef notyet device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n", __func__, dump->wr_flash, dump->len, dump->data); #endif if (dump->wr_flash) cudbg->use_flash = 1; MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap)); memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap)); rc = cudbg_collect(handle, buf, &dump->len); if (rc != 0) goto done; rc = copyout(buf, dump->data, dump->len); done: cudbg_free_handle(handle); free(buf, M_CXGBE); return (rc); } static void free_offload_policy(struct t4_offload_policy *op) { struct offload_rule *r; int i; if (op == NULL) return; r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { free(r->bpf_prog.bf_insns, M_CXGBE); } free(op->rule, M_CXGBE); free(op, M_CXGBE); } static int set_offload_policy(struct adapter *sc, struct t4_offload_policy *uop) { int i, rc, len; struct t4_offload_policy *op, *old; struct bpf_program *bf; const struct offload_settings *s; struct offload_rule *r; void *u; if (!is_offload(sc)) return (ENODEV); if (uop->nrules == 0) { /* Delete installed policies. */ op = NULL; goto set_policy; } if (uop->nrules > 256) { /* arbitrary */ return (E2BIG); } /* Copy userspace offload policy to kernel */ op = malloc(sizeof(*op), M_CXGBE, M_ZERO | M_WAITOK); op->nrules = uop->nrules; len = op->nrules * sizeof(struct offload_rule); op->rule = malloc(len, M_CXGBE, M_ZERO | M_WAITOK); rc = copyin(uop->rule, op->rule, len); if (rc) { free(op->rule, M_CXGBE); free(op, M_CXGBE); return (rc); } r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { /* Validate open_type */ if (r->open_type != OPEN_TYPE_LISTEN && r->open_type != OPEN_TYPE_ACTIVE && r->open_type != OPEN_TYPE_PASSIVE && r->open_type != OPEN_TYPE_DONTCARE) { error: /* * Rules 0 to i have malloc'd filters that need to be * freed. Rules i+1 to nrules have userspace pointers * and should be left alone. */ op->nrules = i; free_offload_policy(op); return (rc); } /* Validate settings */ s = &r->settings; if ((s->offload != 0 && s->offload != 1) || s->cong_algo < -1 || s->cong_algo > CONG_ALG_HIGHSPEED || s->sched_class < -1 || s->sched_class >= sc->chip_params->nsched_cls) { rc = EINVAL; goto error; } bf = &r->bpf_prog; u = bf->bf_insns; /* userspace ptr */ bf->bf_insns = NULL; if (bf->bf_len == 0) { /* legal, matches everything */ continue; } len = bf->bf_len * sizeof(*bf->bf_insns); bf->bf_insns = malloc(len, M_CXGBE, M_ZERO | M_WAITOK); rc = copyin(u, bf->bf_insns, len); if (rc != 0) goto error; if (!bpf_validate(bf->bf_insns, bf->bf_len)) { rc = EINVAL; goto error; } } set_policy: rw_wlock(&sc->policy_lock); old = sc->policy; sc->policy = op; rw_wunlock(&sc->policy_lock); free_offload_policy(old); return (0); } #define MAX_READ_BUF_SIZE (128 * 1024) static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, remaining, n; uint32_t *buf; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { n = min(remaining, MAX_READ_BUF_SIZE); read_via_memwin(sc, 2, addr, buf, n); rc = copyout(buf, dst, n); if (rc != 0) break; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } #undef MAX_READ_BUF_SIZE static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(struct port_info *pi) { struct adapter *sc = pi->adapter; struct vi_info *vi; struct ifnet *ifp; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; MPASS((pi->flags & FIXED_IFMEDIA) == 0); vi = &pi->vi[0]; if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) { PORT_LOCK(pi); build_medialist(pi, &pi->media); apply_l1cfg(pi); PORT_UNLOCK(pi); end_synchronized_op(sc, LOCK_HELD); } ifp = vi->ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%dGbps %s transceiver inserted.\n", port_top_speed(pi), mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct port_info *pi) { struct vi_info *vi; struct ifnet *ifp; struct link_config *lc; int v; PORT_LOCK_ASSERT_OWNED(pi); for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; lc = &pi->link_cfg; if (lc->link_ok) { ifp->if_baudrate = IF_Mbps(lc->speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = t4_get_regs_len(sc); uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v, bg_map; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; if (pi == NULL) return (EIO); /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; pi->tnl_cong_drops = 0; mtx_lock(&sc->reg_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } bg_map = pi->mps_bg_map; v = 0; /* reuse */ while (bg_map) { i = ffs(bg_map) - 1; t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); bg_map &= ~(1 << i); } mtx_unlock(&sc->reg_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = t4_set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_LOAD_CFG: rc = load_cfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_LOAD_BOOT: rc = load_boot(sc, (struct t4_bootrom *)data); break; case CHELSIO_T4_LOAD_BOOTCFG: rc = load_bootcfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_CUDBG_DUMP: rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data); break; case CHELSIO_T4_SET_OFLD_POLICY: rc = set_offload_policy(sc, (struct t4_offload_policy *)data); break; default: rc = ENOTTY; } return (rc); } void t4_db_full(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } void t4_db_dropped(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { rc = vi_full_init(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * t = ptr to tunable. * nc = number of CPUs. * c = compiled in default for that tunable. */ static void calculate_nqueues(int *t, int nc, const int c) { int nq; if (*t > 0) return; nq = *t < 0 ? -*t : c; *t = min(nc, nq); } /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq < 1) { #ifdef RSS t4_ntxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_ntxq, nc, NTXQ); #endif } calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI); if (t4_nrxq < 1) { #ifdef RSS t4_nrxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_nrxq, nc, NRXQ); #endif } calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI); #if defined(TCP_OFFLOAD) || defined(RATELIMIT) calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ); calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI); #endif #ifdef TCP_OFFLOAD calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ); calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; } if (t4_iscsicaps_allowed == -1) { t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | FW_CAPS_CONFIG_ISCSI_TARGET_PDU | FW_CAPS_CONFIG_ISCSI_T10DIF; } if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS) t4_tmr_idx_ofld = TMR_IDX_OFLD; if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS) t4_pktc_idx_ofld = PKTC_IDX_OFLD; #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; if (t4_iscsicaps_allowed == -1) t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI); calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI); #endif if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS) t4_tmr_idx = TMR_IDX; if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS) t4_pktc_idx = PKTC_IDX; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; /* * Number of VIs to create per-port. The first VI is the "main" regular * VI for the port. The rest are additional virtual interfaces on the * same physical port. Note that the main VI does not have native * netmap support but the extra VIs do. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis < 1) t4_num_vis = 1; if (t4_num_vis > nitems(vi_mac_funcs)) { t4_num_vis = nitems(vi_mac_funcs); printf("cxgbe: number of VIs limited to %d\n", t4_num_vis); } if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) { pcie_relaxed_ordering = 1; #if defined(__i386__) || defined(__amd64__) if (cpu_vendor_id == CPU_VENDOR_INTEL) pcie_relaxed_ordering = 0; #endif } } #ifdef DDB static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos; reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2); save = t4_read_reg(sc, reg); base = sc->memwin[2].mw_base; /* Dump TCB for the tid */ tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE); tcb_addr += tid * TCB_SIZE; if (is_t4(sc)) { pf = 0; win_pos = tcb_addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); win_pos = tcb_addr & ~0x7f; /* start must be 128B aligned */ } t4_write_reg(sc, reg, win_pos | pf); t4_read_reg(sc, reg); off = tcb_addr - win_pos; for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, base + off)); db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } t4_write_reg(sc, reg, save); t4_read_reg(sc, reg); } static void t4_dump_devlog(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e e; int i, first, j, m, nentries, rc; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { db_printf("devlog params not valid\n"); return; } nentries = dparams->size / sizeof(struct fw_devlog_e); m = fwmtype_to_hwmtype(dparams->memtype); /* Find the first entry. */ first = -1; for (i = 0; i < nentries && !db_pager_quit; i++) { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) break; if (e.timestamp == 0) break; e.timestamp = be64toh(e.timestamp); if (e.timestamp < ftstamp) { ftstamp = e.timestamp; first = i; } } if (first == -1) return; i = first; do { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) return; if (e.timestamp == 0) return; e.timestamp = be64toh(e.timestamp); e.seqno = be32toh(e.seqno); for (j = 0; j < 8; j++) e.params[j] = be32toh(e.params[j]); db_printf("%10d %15ju %8s %8s ", e.seqno, e.timestamp, (e.level < nitems(devlog_level_strings) ? devlog_level_strings[e.level] : "UNKNOWN"), (e.facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e.facility] : "UNKNOWN")); db_printf(e.fmt, e.params[0], e.params[1], e.params[2], e.params[3], e.params[4], e.params[5], e.params[6], e.params[7]); if (++i == nentries) i = 0; } while (i != first && !db_pager_quit); } static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table); _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table); DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL) { device_t dev; int t; bool valid; valid = false; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); valid = true; } db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 devlog \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } t4_dump_devlog(device_get_softc(dev)); } DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL) { device_t dev; int radix, tid, t; bool valid; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); t = db_read_token(); if (t == tNUMBER) { tid = db_tok_number; valid = true; } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 tcb \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } if (tid < 0) { db_printf("invalid tid\n"); return; } t4_dump_tcb(device_get_softc(dev), tid); } #endif /* * Borrowed from cesa_prep_aes_key(). * * NB: The crypto engine wants the words in the decryption key in reverse * order. */ void t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits) { uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)]; uint32_t *dkey; int i; rijndaelKeySetupEnc(ek, enc_key, kbits); dkey = dec_key; dkey += (kbits / 8) / 4; switch (kbits) { case 128: for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 10 + i]); break; case 192: for (i = 0; i < 2; i++) *--dkey = htobe32(ek[4 * 11 + 2 + i]); for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 12 + i]); break; case 256: for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 13 + i]); for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 14 + i]); break; } MPASS(dkey == dec_key); } static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, t4_filter_rpl, CPL_COOKIE_FILTER); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl, CPL_COOKIE_FILTER); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, t4_hashfilter_ao_rpl, CPL_COOKIE_HASHFILTER); t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, t4_hashfilter_tcb_rpl, CPL_COOKIE_HASHFILTER); t4_register_shared_cpl_handler(CPL_ABORT_RPL_RSS, t4_del_hashfilter_rpl, CPL_COOKIE_HASHFILTER); t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt); t4_register_cpl_handler(CPL_SMT_WRITE_RPL, do_smt_write_rpl); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass, t6_devclass; static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0); MODULE_VERSION(t6nex, 1); MODULE_DEPEND(t6nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t6nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0); MODULE_VERSION(cc, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0); MODULE_VERSION(vcc, 1); Index: projects/clang700-import/sys/dev/cxgbe/t4_netmap.c =================================================================== --- projects/clang700-import/sys/dev/cxgbe/t4_netmap.c (revision 337615) +++ projects/clang700-import/sys/dev/cxgbe/t4_netmap.c (revision 337616) @@ -1,1073 +1,1068 @@ /*- * Copyright (c) 2014 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef DEV_NETMAP #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" extern int fl_pad; /* XXXNM */ SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe netmap parameters"); /* * 0 = normal netmap rx * 1 = black hole * 2 = supermassive black hole (buffer packing enabled) */ int black_hole = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RDTUN, &black_hole, 0, "Sink incoming packets."); int rx_ndesc = 256; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN, &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated."); int rx_nframes = 64; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_nframes, CTLFLAG_RWTUN, &rx_nframes, 0, "max # of frames received before waking up netmap rx."); int holdoff_tmr_idx = 2; SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN, &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues."); /* * Congestion drops. * -1: no congestion feedback (not recommended). * 0: backpressure the channel instead of dropping packets right away. * 1: no backpressure, drop packets for the congested queue immediately. */ static int nm_cong_drop = 1; TUNABLE_INT("hw.cxgbe.nm_cong_drop", &nm_cong_drop); int starve_fl = 0; SYSCTL_INT(_hw_cxgbe, OID_AUTO, starve_fl, CTLFLAG_RWTUN, &starve_fl, 0, "Don't ring fl db for netmap rx queues."); /* * Try to process tx credits in bulk. This may cause a delay in the return of * tx credits and is suitable for bursty or non-stop tx only. */ int lazy_tx_credit_flush = 1; SYSCTL_INT(_hw_cxgbe, OID_AUTO, lazy_tx_credit_flush, CTLFLAG_RWTUN, &lazy_tx_credit_flush, 0, "lazy credit flush for netmap tx queues."); static int alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong) { int rc, cntxt_id, i; __be32 v; struct adapter *sc = vi->pi->adapter; struct sge_params *sp = &sc->params.sge; struct netmap_adapter *na = NA(vi->ifp); struct fw_iq_cmd c; MPASS(na != NULL); MPASS(nm_rxq->iq_desc != NULL); MPASS(nm_rxq->fl_desc != NULL); bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE); bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + sp->spg_len); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); MPASS(!forwarding_intr_to_fwq(sc)); KASSERT(nm_rxq->intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, nm_rxq->intr_idx)); v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx); c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(0) | V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); c.iqsize = htobe16(vi->qsize_rxq); c.iqaddr = htobe64(nm_rxq->iq_ba); if (cong >= 0) { c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | (black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0)); c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B) | V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); c.fl0size = htobe16(na->num_rx_desc / 8 + sp->spg_len / EQ_ESIZE); c.fl0addr = htobe64(nm_rxq->fl_ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create netmap ingress queue: %d\n", rc); return (rc); } nm_rxq->iq_cidx = 0; MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - sp->spg_len / IQ_ESIZE); nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->iq_cntxt_id = be16toh(c.iqid); nm_rxq->iq_abs_id = be16toh(c.physiqid); cntxt_id = nm_rxq->iq_cntxt_id - sc->sge.iq_start; if (cntxt_id >= sc->sge.niq) { panic ("%s: nm_rxq->iq_cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.niq - 1); } sc->sge.iqmap[cntxt_id] = (void *)nm_rxq; nm_rxq->fl_cntxt_id = be16toh(c.fl0id); nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; MPASS(nm_rxq->fl_sidx == na->num_rx_desc); cntxt_id = nm_rxq->fl_cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) { panic("%s: nm_rxq->fl_cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); } sc->sge.eqmap[cntxt_id] = (void *)nm_rxq; nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) | sc->chip_params->sge_fl_db; if (chip_id(sc) >= CHELSIO_T5 && cong >= 0) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id); if (cong == 0) val = 1 << 19; else { val = 2 << 19; for (i = 0; i < 4; i++) { if (cong & (1 << i)) val |= 1 << (i << 2); } } rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* report error but carry on */ device_printf(sc->dev, "failed to set congestion manager context for " "ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc); } } t4_write_reg(sc, sc->sge_gts_reg, V_INGRESSQID(nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); return (rc); } static int free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { struct adapter *sc = vi->pi->adapter; int rc; rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff); if (rc != 0) device_printf(sc->dev, "%s: failed for iq %d, fl %d: %d\n", __func__, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, rc); nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; return (rc); } static int alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { int rc, cntxt_id; size_t len; struct adapter *sc = vi->pi->adapter; struct netmap_adapter *na = NA(vi->ifp); struct fw_eq_eth_cmd c; MPASS(na != NULL); MPASS(nm_txq->desc != NULL); len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; bzero(nm_txq->desc, len); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_ETH_CMD_EQSIZE(len / EQ_ESIZE)); c.eqaddr = htobe64(nm_txq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create netmap egress queue: %d\n", rc); return (rc); } nm_txq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); cntxt_id = nm_txq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: nm_txq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = (void *)nm_txq; nm_txq->pidx = nm_txq->cidx = 0; MPASS(nm_txq->sidx == na->num_tx_desc); nm_txq->equiqidx = nm_txq->equeqidx = nm_txq->dbidx = 0; nm_txq->doorbells = sc->doorbells; if (isset(&nm_txq->doorbells, DOORBELL_UDB) || isset(&nm_txq->doorbells, DOORBELL_UDBWC) || isset(&nm_txq->doorbells, DOORBELL_WCWR)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (nm_txq->cntxt_id >> s_qpp) << PAGE_SHIFT; nm_txq->udb_qid = nm_txq->cntxt_id & mask; if (nm_txq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) clrbit(&nm_txq->doorbells, DOORBELL_WCWR); else { udb += nm_txq->udb_qid << UDBS_SEG_SHIFT; nm_txq->udb_qid = 0; } nm_txq->udb = (volatile void *)udb; } return (rc); } static int free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { struct adapter *sc = vi->pi->adapter; int rc; rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id); if (rc != 0) device_printf(sc->dev, "%s: failed for eq %d: %d\n", __func__, nm_txq->cntxt_id, rc); nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; return (rc); } static int cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_slot *slot; struct netmap_kring *kring; struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; int rc, i, j, hwidx; struct hw_buf_info *hwb; ASSERT_SYNCHRONIZED_OP(sc); if ((vi->flags & VI_INIT_DONE) == 0 || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (EAGAIN); hwb = &sc->sge.hw_buf_info[0]; for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { if (hwb->size == NETMAP_BUF_SIZE(na)) break; } if (i >= SGE_FLBUF_SIZES) { if_printf(ifp, "no hwidx for netmap buffer size %d.\n", NETMAP_BUF_SIZE(na)); return (ENXIO); } hwidx = i; /* Must set caps before calling netmap_reset */ nm_set_native_flags(na); for_each_nm_rxq(vi, i, nm_rxq) { - struct irq *irq = &sc->irq[vi->first_intr + i]; - kring = na->rx_rings[nm_rxq->nid]; if (!nm_kring_pending_on(kring) || nm_rxq->iq_cntxt_id != INVALID_NM_RXQ_CNTXT_ID) continue; alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop)); nm_rxq->fl_hwidx = hwidx; slot = netmap_reset(na, NR_RX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ /* We deal with 8 bufs at a time */ MPASS((na->num_rx_desc & 7) == 0); MPASS(na->num_rx_desc == nm_rxq->fl_sidx); for (j = 0; j < nm_rxq->fl_sidx; j++) { uint64_t ba; PNMB(na, &slot[j], &ba); MPASS(ba != 0); nm_rxq->fl_desc[j] = htobe64(ba | hwidx); } j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8; MPASS((j & 7) == 0); j /= 8; /* driver pidx to hardware pidx */ wmb(); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(j)); - atomic_cmpset_int(&irq->nm_state, NM_OFF, NM_ON); + atomic_cmpset_int(&nm_rxq->nm_state, NM_OFF, NM_ON); } for_each_nm_txq(vi, i, nm_txq) { kring = na->tx_rings[nm_txq->nid]; if (!nm_kring_pending_on(kring) || nm_txq->cntxt_id != INVALID_NM_TXQ_CNTXT_ID) continue; alloc_nm_txq_hwq(vi, nm_txq); slot = netmap_reset(na, NR_TX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ } if (vi->nm_rss == NULL) { vi->nm_rss = malloc(vi->rss_size * sizeof(uint16_t), M_CXGBE, M_ZERO | M_WAITOK); } for (i = 0; i < vi->rss_size;) { for_each_nm_rxq(vi, j, nm_rxq) { vi->nm_rss[i++] = nm_rxq->iq_abs_id; if (i == vi->rss_size) break; } } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, vi->nm_rss, vi->rss_size); if (rc != 0) if_printf(ifp, "netmap rss_config failed: %d\n", rc); return (rc); } static int cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, struct netmap_adapter *na) { struct netmap_kring *kring; int rc, i; struct sge_nm_txq *nm_txq; struct sge_nm_rxq *nm_rxq; ASSERT_SYNCHRONIZED_OP(sc); if ((vi->flags & VI_INIT_DONE) == 0) return (0); rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, vi->rss, vi->rss_size); if (rc != 0) if_printf(ifp, "failed to restore RSS config: %d\n", rc); nm_clear_native_flags(na); for_each_nm_txq(vi, i, nm_txq) { struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; kring = na->tx_rings[nm_txq->nid]; if (!nm_kring_pending_off(kring) || nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID) continue; /* Wait for hw pidx to catch up ... */ while (be16toh(nm_txq->pidx) != spg->pidx) pause("nmpidx", 1); /* ... and then for the cidx. */ while (spg->pidx != spg->cidx) pause("nmcidx", 1); free_nm_txq_hwq(vi, nm_txq); } for_each_nm_rxq(vi, i, nm_rxq) { - struct irq *irq = &sc->irq[vi->first_intr + i]; - kring = na->rx_rings[nm_rxq->nid]; if (!nm_kring_pending_off(kring) || nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID) continue; - while (!atomic_cmpset_int(&irq->nm_state, NM_ON, NM_OFF)) + while (!atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_OFF)) pause("nmst", 1); free_nm_rxq_hwq(vi, nm_rxq); } return (rc); } static int cxgbe_netmap_reg(struct netmap_adapter *na, int on) { struct ifnet *ifp = na->ifp; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; int rc; rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg"); if (rc != 0) return (rc); if (on) rc = cxgbe_netmap_on(sc, vi, ifp, na); else rc = cxgbe_netmap_off(sc, vi, ifp, na); end_synchronized_op(sc, 0); return (rc); } /* How many packets can a single type1 WR carry in n descriptors */ static inline int ndesc_to_npkt(const int n) { MPASS(n > 0 && n <= SGE_MAX_WR_NDESC); return (n * 2 - 1); } #define MAX_NPKT_IN_TYPE1_WR (ndesc_to_npkt(SGE_MAX_WR_NDESC)) /* Space (in descriptors) needed for a type1 WR that carries n packets */ static inline int npkt_to_ndesc(const int n) { MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); return ((n + 2) / 2); } /* Space (in 16B units) needed for a type1 WR that carries n packets */ static inline int npkt_to_len16(const int n) { MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR); return (n * 2 + 1); } #define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx) static void ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq) { int n; u_int db = nm_txq->doorbells; MPASS(nm_txq->pidx != nm_txq->dbidx); n = NMIDXDIFF(nm_txq, dbidx); if (n > 1) clrbit(&db, DOORBELL_WCWR); wmb(); switch (ffs(db) - 1) { case DOORBELL_UDB: *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); break; case DOORBELL_WCWR: { volatile uint64_t *dst, *src; /* * Queues whose 128B doorbell segment fits in the page do not * use relative qid (udb_qid is always 0). Only queues with * doorbell segments can do WCWR. */ KASSERT(nm_txq->udb_qid == 0 && n == 1, ("%s: inappropriate doorbell (0x%x, %d, %d) for nm_txq %p", __func__, nm_txq->doorbells, n, nm_txq->pidx, nm_txq)); dst = (volatile void *)((uintptr_t)nm_txq->udb + UDBS_WR_OFFSET - UDBS_DB_OFFSET); src = (void *)&nm_txq->desc[nm_txq->dbidx]; while (src != (void *)&nm_txq->desc[nm_txq->dbidx + 1]) *dst++ = *src++; wmb(); break; } case DOORBELL_UDBWC: *nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n)); wmb(); break; case DOORBELL_KDB: t4_write_reg(sc, sc->sge_kdoorbell_reg, V_QID(nm_txq->cntxt_id) | V_PIDX(n)); break; } nm_txq->dbidx = nm_txq->pidx; } /* * Write work requests to send 'npkt' frames and ring the doorbell to send them * on their way. No need to check for wraparound. */ static void cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, struct netmap_kring *kring, int npkt, int npkt_remaining, int txcsum) { struct netmap_ring *ring = kring->ring; struct netmap_slot *slot; const u_int lim = kring->nkr_num_slots - 1; struct fw_eth_tx_pkts_wr *wr = (void *)&nm_txq->desc[nm_txq->pidx]; uint16_t len; uint64_t ba; struct cpl_tx_pkt_core *cpl; struct ulptx_sgl *usgl; int i, n; while (npkt) { n = min(npkt, MAX_NPKT_IN_TYPE1_WR); len = 0; wr = (void *)&nm_txq->desc[nm_txq->pidx]; wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(npkt_to_len16(n))); wr->npkt = n; wr->r3 = 0; wr->type = 1; cpl = (void *)(wr + 1); for (i = 0; i < n; i++) { slot = &ring->slot[kring->nr_hwcur]; PNMB(kring->na, slot, &ba); MPASS(ba != 0); cpl->ctrl0 = nm_txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(slot->len); /* * netmap(4) says "netmap does not use features such as * checksum offloading, TCP segmentation offloading, * encryption, VLAN encapsulation/decapsulation, etc." * * So the ncxl interfaces have tx hardware checksumming * disabled by default. But you can override netmap by * enabling IFCAP_TXCSUM on the interface manully. */ cpl->ctrl1 = txcsum ? 0 : htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); usgl = (void *)(cpl + 1); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(1)); usgl->len0 = htobe32(slot->len); usgl->addr0 = htobe64(ba); slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); cpl = (void *)(usgl + 1); MPASS(slot->len + len <= UINT16_MAX); len += slot->len; kring->nr_hwcur = nm_next(kring->nr_hwcur, lim); } wr->plen = htobe16(len); npkt -= n; nm_txq->pidx += npkt_to_ndesc(n); MPASS(nm_txq->pidx <= nm_txq->sidx); if (__predict_false(nm_txq->pidx == nm_txq->sidx)) { /* * This routine doesn't know how to write WRs that wrap * around. Make sure it wasn't asked to. */ MPASS(npkt == 0); nm_txq->pidx = 0; } if (npkt == 0 && npkt_remaining == 0) { /* All done. */ if (lazy_tx_credit_flush == 0) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); nm_txq->equeqidx = nm_txq->pidx; nm_txq->equiqidx = nm_txq->pidx; } ring_nm_txq_db(sc, nm_txq); return; } if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ | F_FW_WR_EQUIQ); nm_txq->equeqidx = nm_txq->pidx; nm_txq->equiqidx = nm_txq->pidx; } else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); nm_txq->equeqidx = nm_txq->pidx; } if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC) ring_nm_txq_db(sc, nm_txq); } /* Will get called again. */ MPASS(npkt_remaining); } /* How many contiguous free descriptors starting at pidx */ static inline int contiguous_ndesc_available(struct sge_nm_txq *nm_txq) { if (nm_txq->cidx > nm_txq->pidx) return (nm_txq->cidx - nm_txq->pidx - 1); else if (nm_txq->cidx > 0) return (nm_txq->sidx - nm_txq->pidx); else return (nm_txq->sidx - nm_txq->pidx - 1); } static int reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq) { struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; uint16_t hw_cidx = spg->cidx; /* snapshot */ struct fw_eth_tx_pkts_wr *wr; int n = 0; hw_cidx = be16toh(hw_cidx); while (nm_txq->cidx != hw_cidx) { wr = (void *)&nm_txq->desc[nm_txq->cidx]; MPASS(wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR))); MPASS(wr->type == 1); MPASS(wr->npkt > 0 && wr->npkt <= MAX_NPKT_IN_TYPE1_WR); n += wr->npkt; nm_txq->cidx += npkt_to_ndesc(wr->npkt); /* * We never sent a WR that wrapped around so the credits coming * back, WR by WR, should never cause the cidx to wrap around * either. */ MPASS(nm_txq->cidx <= nm_txq->sidx); if (__predict_false(nm_txq->cidx == nm_txq->sidx)) nm_txq->cidx = 0; } return (n); } static int cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_nm_txq + kring->ring_id]; const u_int head = kring->rhead; u_int reclaimed = 0; int n, d, npkt_remaining, ndesc_remaining, txcsum; /* * Tx was at kring->nr_hwcur last time around and now we need to advance * to kring->rhead. Note that the driver's pidx moves independent of * netmap's kring->nr_hwcur (pidx counts descriptors and the relation * between descriptors and frames isn't 1:1). */ npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur : kring->nkr_num_slots - kring->nr_hwcur + head; txcsum = ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); while (npkt_remaining) { reclaimed += reclaim_nm_tx_desc(nm_txq); ndesc_remaining = contiguous_ndesc_available(nm_txq); /* Can't run out of descriptors with packets still remaining */ MPASS(ndesc_remaining > 0); /* # of desc needed to tx all remaining packets */ d = (npkt_remaining / MAX_NPKT_IN_TYPE1_WR) * SGE_MAX_WR_NDESC; if (npkt_remaining % MAX_NPKT_IN_TYPE1_WR) d += npkt_to_ndesc(npkt_remaining % MAX_NPKT_IN_TYPE1_WR); if (d <= ndesc_remaining) n = npkt_remaining; else { /* Can't send all, calculate how many can be sent */ n = (ndesc_remaining / SGE_MAX_WR_NDESC) * MAX_NPKT_IN_TYPE1_WR; if (ndesc_remaining % SGE_MAX_WR_NDESC) n += ndesc_to_npkt(ndesc_remaining % SGE_MAX_WR_NDESC); } /* Send n packets and update nm_txq->pidx and kring->nr_hwcur */ npkt_remaining -= n; cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining, txcsum); } MPASS(npkt_remaining == 0); MPASS(kring->nr_hwcur == head); MPASS(nm_txq->dbidx == nm_txq->pidx); /* * Second part: reclaim buffers for completed transmissions. */ if (reclaimed || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) { reclaimed += reclaim_nm_tx_desc(nm_txq); kring->nr_hwtail += reclaimed; if (kring->nr_hwtail >= kring->nkr_num_slots) kring->nr_hwtail -= kring->nkr_num_slots; } return (0); } static int cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; struct ifnet *ifp = na->ifp; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_nm_rxq + kring->ring_id]; u_int const head = kring->rhead; u_int n; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; if (black_hole) return (0); /* No updates ever. */ if (netmap_no_pendintr || force_update) { kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx); kring->nr_kflags &= ~NKR_PENDINTR; } if (nm_rxq->fl_db_saved > 0 && starve_fl == 0) { wmb(); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(nm_rxq->fl_db_saved)); nm_rxq->fl_db_saved = 0; } /* Userspace done with buffers from kring->nr_hwcur to head */ n = head >= kring->nr_hwcur ? head - kring->nr_hwcur : kring->nkr_num_slots - kring->nr_hwcur + head; n &= ~7U; if (n > 0) { u_int fl_pidx = nm_rxq->fl_pidx; struct netmap_slot *slot = &ring->slot[fl_pidx]; uint64_t ba; int i, dbinc = 0, hwidx = nm_rxq->fl_hwidx; /* * We always deal with 8 buffers at a time. We must have * stopped at an 8B boundary (fl_pidx) last time around and we * must have a multiple of 8B buffers to give to the freelist. */ MPASS((fl_pidx & 7) == 0); MPASS((n & 7) == 0); IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots); IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx); while (n > 0) { for (i = 0; i < 8; i++, fl_pidx++, slot++) { PNMB(na, slot, &ba); MPASS(ba != 0); nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx); slot->flags &= ~NS_BUF_CHANGED; MPASS(fl_pidx <= nm_rxq->fl_sidx); } n -= 8; if (fl_pidx == nm_rxq->fl_sidx) { fl_pidx = 0; slot = &ring->slot[0]; } if (++dbinc == 8 && n >= 32) { wmb(); if (starve_fl) nm_rxq->fl_db_saved += dbinc; else { t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(dbinc)); } dbinc = 0; } } MPASS(nm_rxq->fl_pidx == fl_pidx); if (dbinc > 0) { wmb(); if (starve_fl) nm_rxq->fl_db_saved += dbinc; else { t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(dbinc)); } } } return (0); } void cxgbe_nm_attach(struct vi_info *vi) { struct port_info *pi; struct adapter *sc; struct netmap_adapter na; MPASS(vi->nnmrxq > 0); MPASS(vi->ifp != NULL); pi = vi->pi; sc = pi->adapter; bzero(&na, sizeof(na)); na.ifp = vi->ifp; na.na_flags = NAF_BDG_MAYSLEEP; /* Netmap doesn't know about the space reserved for the status page. */ na.num_tx_desc = vi->qsize_txq - sc->params.sge.spg_len / EQ_ESIZE; /* * The freelist's cidx/pidx drives netmap's rx cidx/pidx. So * num_rx_desc is based on the number of buffers that can be held in the * freelist, and not the number of entries in the iq. (These two are * not exactly the same due to the space taken up by the status page). */ na.num_rx_desc = rounddown(vi->qsize_rxq, 8); na.nm_txsync = cxgbe_netmap_txsync; na.nm_rxsync = cxgbe_netmap_rxsync; na.nm_register = cxgbe_netmap_reg; na.num_tx_rings = vi->nnmtxq; na.num_rx_rings = vi->nnmrxq; netmap_attach(&na); } void cxgbe_nm_detach(struct vi_info *vi) { MPASS(vi->nnmrxq > 0); MPASS(vi->ifp != NULL); netmap_detach(vi->ifp); } static inline const void * unwrap_nm_fw6_msg(const struct cpl_fw6_msg *cpl) { MPASS(cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL); /* data[0] is RSS header */ return (&cpl->data[1]); } static void handle_nm_sge_egr_update(struct adapter *sc, struct ifnet *ifp, const struct cpl_sge_egr_update *egr) { uint32_t oq; struct sge_nm_txq *nm_txq; oq = be32toh(egr->opcode_qid); MPASS(G_CPL_OPCODE(oq) == CPL_SGE_EGR_UPDATE); nm_txq = (void *)sc->sge.eqmap[G_EGR_QID(oq) - sc->sge.eq_start]; netmap_tx_irq(ifp, nm_txq->nid); } void -t4_nm_intr(void *arg) +service_nm_rxq(struct sge_nm_rxq *nm_rxq) { - struct sge_nm_rxq *nm_rxq = arg; struct vi_info *vi = nm_rxq->vi; struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; struct netmap_adapter *na = NA(ifp); struct netmap_kring *kring = na->rx_rings[nm_rxq->nid]; struct netmap_ring *ring = kring->ring; struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx]; const void *cpl; uint32_t lq; u_int work = 0; uint8_t opcode; uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx); u_int fl_credits = fl_cidx & 7; u_int ndesc = 0; /* desc processed since last cidx update */ u_int nframes = 0; /* frames processed since last netmap wakeup */ while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) { rmb(); lq = be32toh(d->rsp.pldbuflen_qid); opcode = d->rss.opcode; cpl = &d->cpl[0]; switch (G_RSPD_TYPE(d->rsp.u.type_gen)) { case X_RSPD_TYPE_FLBUF: /* fall through */ case X_RSPD_TYPE_CPL: MPASS(opcode < NUM_CPL_CMDS); switch (opcode) { case CPL_FW4_MSG: case CPL_FW6_MSG: cpl = unwrap_nm_fw6_msg(cpl); /* fall through */ case CPL_SGE_EGR_UPDATE: handle_nm_sge_egr_update(sc, ifp, cpl); break; case CPL_RX_PKT: ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - sc->params.sge.fl_pktshift; ring->slot[fl_cidx].flags = 0; nframes++; if (!(lq & F_RSPD_NEWBUF)) { MPASS(black_hole == 2); break; } fl_credits++; if (__predict_false(++fl_cidx == nm_rxq->fl_sidx)) fl_cidx = 0; break; default: panic("%s: unexpected opcode 0x%x on nm_rxq %p", __func__, opcode, nm_rxq); } break; case X_RSPD_TYPE_INTR: /* Not equipped to handle forwarded interrupts. */ panic("%s: netmap queue received interrupt for iq %u\n", __func__, lq); default: panic("%s: illegal response type %d on nm_rxq %p", __func__, G_RSPD_TYPE(d->rsp.u.type_gen), nm_rxq); } d++; if (__predict_false(++nm_rxq->iq_cidx == nm_rxq->iq_sidx)) { nm_rxq->iq_cidx = 0; d = &nm_rxq->iq_desc[0]; nm_rxq->iq_gen ^= F_RSPD_GEN; } if (__predict_false(++nframes == rx_nframes) && !black_hole) { atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); netmap_rx_irq(ifp, nm_rxq->nid, &work); nframes = 0; } if (__predict_false(++ndesc == rx_ndesc)) { if (black_hole && fl_credits >= 8) { fl_credits /= 8; IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(fl_credits)); fl_credits = fl_cidx & 7; } t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) | V_INGRESSQID(nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndesc = 0; } } atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); if (black_hole) { fl_credits /= 8; IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); t4_write_reg(sc, sc->sge_kdoorbell_reg, nm_rxq->fl_db_val | V_PIDX(fl_credits)); } else if (nframes > 0) netmap_rx_irq(ifp, nm_rxq->nid, &work); t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndesc) | V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); } #endif Index: projects/clang700-import/sys/dev/cxgbe/t4_sge.c =================================================================== --- projects/clang700-import/sys/dev/cxgbe/t4_sge.c (revision 337615) +++ projects/clang700-import/sys/dev/cxgbe/t4_sge.c (revision 337616) @@ -1,5781 +1,5872 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEV_NETMAP #include #include #include #include #include #endif #include "common/common.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_msg.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #ifdef T4_PKT_TIMESTAMP #define RX_COPY_THRESHOLD (MINCLSIZE - 8) #else #define RX_COPY_THRESHOLD MINCLSIZE #endif /* * Ethernet frames are DMA'd at this byte offset into the freelist buffer. * 0-7 are valid values. */ static int fl_pktshift = 0; TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift); /* * Pad ethernet payload up to this boundary. * -1: driver should figure out a good value. * 0: disable padding. * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value. */ int fl_pad = -1; TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad); /* * Status page length. * -1: driver should figure out a good value. * 64 or 128 are the only other valid values. */ static int spg_len = -1; TUNABLE_INT("hw.cxgbe.spg_len", &spg_len); /* * Congestion drops. * -1: no congestion feedback (not recommended). * 0: backpressure the channel instead of dropping packets right away. * 1: no backpressure, drop packets for the congested queue immediately. */ static int cong_drop = 0; TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop); /* * Deliver multiple frames in the same free list buffer if they fit. * -1: let the driver decide whether to enable buffer packing or not. * 0: disable buffer packing. * 1: enable buffer packing. */ static int buffer_packing = -1; TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); /* * Start next frame in a packed buffer at this boundary. * -1: driver should figure out a good value. * T4: driver will ignore this and use the same value as fl_pad above. * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. */ static int fl_pack = -1; TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); /* * Allow the driver to create mbuf(s) in a cluster allocated for rx. * 0: never; always allocate mbufs from the zone_mbuf UMA zone. * 1: ok to create mbuf(s) within a cluster if there is room. */ static int allow_mbufs_in_cluster = 1; TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); /* * Largest rx cluster size that the driver is allowed to allocate. */ static int largest_rx_cluster = MJUM16BYTES; TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); /* * Size of cluster allocation that's most likely to succeed. The driver will * fall back to this size if it fails to allocate clusters larger than this. */ static int safest_rx_cluster = PAGE_SIZE; TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); #ifdef RATELIMIT /* * Knob to control TCP timestamp rewriting, and the granularity of the tick used * for rewriting. -1 and 0-3 are all valid values. * -1: hardware should leave the TCP timestamps alone. * 0: 1ms * 1: 100us * 2: 10us * 3: 1us */ static int tsclk = -1; TUNABLE_INT("hw.cxgbe.tsclk", &tsclk); static int eo_max_backlog = 1024 * 1024; TUNABLE_INT("hw.cxgbe.eo_max_backlog", &eo_max_backlog); #endif /* * The interrupt holdoff timers are multiplied by this value on T6+. * 1 and 3-17 (both inclusive) are legal values. */ static int tscale = 1; TUNABLE_INT("hw.cxgbe.tscale", &tscale); /* * Number of LRO entries in the lro_ctrl structure per rx queue. */ static int lro_entries = TCP_LRO_ENTRIES; TUNABLE_INT("hw.cxgbe.lro_entries", &lro_entries); /* * This enables presorting of frames before they're fed into tcp_lro_rx. */ static int lro_mbufs = 0; TUNABLE_INT("hw.cxgbe.lro_mbufs", &lro_mbufs); struct txpkts { u_int wr_type; /* type 0 or type 1 */ u_int npkt; /* # of packets in this work request */ u_int plen; /* total payload (sum of all packets) */ u_int len16; /* # of 16B pieces used by this work request */ }; /* A packet's SGL. This + m_pkthdr has all info needed for tx */ struct sgl { struct sglist sg; struct sglist_seg seg[TX_SGL_SEGS]; }; static int service_iq(struct sge_iq *, int); +static int service_iq_fl(struct sge_iq *, int); static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); static inline void init_eq(struct adapter *, struct sge_eq *, int, int, uint8_t, uint16_t, char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, bus_addr_t *, void **); static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, void *); static int alloc_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct vi_info *, struct sge_iq *, struct sge_fl *); static void add_iq_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_iq *); static void add_fl_sysctls(struct adapter *, struct sysctl_ctx_list *, struct sysctl_oid *, struct sge_fl *); static int alloc_fwq(struct adapter *); static int free_fwq(struct adapter *); static int alloc_mgmtq(struct adapter *); static int free_mgmtq(struct adapter *); static int alloc_rxq(struct vi_info *, struct sge_rxq *, int, int, struct sysctl_oid *); static int free_rxq(struct vi_info *, struct sge_rxq *); #ifdef TCP_OFFLOAD static int alloc_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *, int, int, struct sysctl_oid *); static int free_ofld_rxq(struct vi_info *, struct sge_ofld_rxq *); #endif #ifdef DEV_NETMAP static int alloc_nm_rxq(struct vi_info *, struct sge_nm_rxq *, int, int, struct sysctl_oid *); static int free_nm_rxq(struct vi_info *, struct sge_nm_rxq *); static int alloc_nm_txq(struct vi_info *, struct sge_nm_txq *, int, int, struct sysctl_oid *); static int free_nm_txq(struct vi_info *, struct sge_nm_txq *); #endif static int ctrl_eq_alloc(struct adapter *, struct sge_eq *); static int eth_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #if defined(TCP_OFFLOAD) || defined(RATELIMIT) static int ofld_eq_alloc(struct adapter *, struct vi_info *, struct sge_eq *); #endif static int alloc_eq(struct adapter *, struct vi_info *, struct sge_eq *); static int free_eq(struct adapter *, struct sge_eq *); static int alloc_wrq(struct adapter *, struct vi_info *, struct sge_wrq *, struct sysctl_oid *); static int free_wrq(struct adapter *, struct sge_wrq *); static int alloc_txq(struct vi_info *, struct sge_txq *, int, struct sysctl_oid *); static int free_txq(struct vi_info *, struct sge_txq *); static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int); static inline void ring_fl_db(struct adapter *, struct sge_fl *); static int refill_fl(struct adapter *, struct sge_fl *, int); static void refill_sfl(void *); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct adapter *, struct sge_fl *); static void find_best_refill_source(struct adapter *, struct sge_fl *, int); static void find_safe_refill_source(struct adapter *, struct sge_fl *); static void add_fl_to_sfl(struct adapter *, struct sge_fl *); static inline void get_pkt_gl(struct mbuf *, struct sglist *); static inline u_int txpkt_len16(u_int, u_int); static inline u_int txpkt_vm_len16(u_int, u_int); static inline u_int txpkts0_len16(u_int); static inline u_int txpkts1_len16(void); static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, struct mbuf *, u_int); static u_int write_txpkt_vm_wr(struct adapter *, struct sge_txq *, struct fw_eth_tx_pkt_vm_wr *, struct mbuf *, u_int); static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, struct mbuf *, const struct txpkts *, u_int); static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int); static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int); static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int); static inline uint16_t read_hw_cidx(struct sge_eq *); static inline u_int reclaimable_tx_desc(struct sge_eq *); static inline u_int total_available_tx_desc(struct sge_eq *); static u_int reclaim_tx_descs(struct sge_txq *, u_int); static void tx_reclaim(void *, int); static __be64 get_flit(struct sglist_seg *, int, int); static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *, struct mbuf *); static int handle_fw_msg(struct sge_iq *, const struct rss_header *, struct mbuf *); static int t4_handle_wrerr_rpl(struct adapter *, const __be64 *); static void wrq_tx_drain(void *, int); static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *); static int sysctl_uint16(SYSCTL_HANDLER_ARGS); static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); #ifdef RATELIMIT static inline u_int txpkt_eo_len16(u_int, u_int, u_int); static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, struct mbuf *); #endif static counter_u64_t extfree_refs; static counter_u64_t extfree_rels; an_handler_t t4_an_handler; fw_msg_handler_t t4_fw_msg_handler[NUM_FW6_TYPES]; cpl_handler_t t4_cpl_handler[NUM_CPL_CMDS]; cpl_handler_t set_tcb_rpl_handlers[NUM_CPL_COOKIES]; cpl_handler_t l2t_write_rpl_handlers[NUM_CPL_COOKIES]; cpl_handler_t act_open_rpl_handlers[NUM_CPL_COOKIES]; cpl_handler_t abort_rpl_rss_handlers[NUM_CPL_COOKIES]; cpl_handler_t fw4_ack_handlers[NUM_CPL_COOKIES]; void t4_register_an_handler(an_handler_t h) { uintptr_t *loc; MPASS(h == NULL || t4_an_handler == NULL); loc = (uintptr_t *)&t4_an_handler; atomic_store_rel_ptr(loc, (uintptr_t)h); } void t4_register_fw_msg_handler(int type, fw_msg_handler_t h) { uintptr_t *loc; MPASS(type < nitems(t4_fw_msg_handler)); MPASS(h == NULL || t4_fw_msg_handler[type] == NULL); /* * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL * handler dispatch table. Reject any attempt to install a handler for * this subtype. */ MPASS(type != FW_TYPE_RSSCPL); MPASS(type != FW6_TYPE_RSSCPL); loc = (uintptr_t *)&t4_fw_msg_handler[type]; atomic_store_rel_ptr(loc, (uintptr_t)h); } void t4_register_cpl_handler(int opcode, cpl_handler_t h) { uintptr_t *loc; MPASS(opcode < nitems(t4_cpl_handler)); MPASS(h == NULL || t4_cpl_handler[opcode] == NULL); loc = (uintptr_t *)&t4_cpl_handler[opcode]; atomic_store_rel_ptr(loc, (uintptr_t)h); } static int set_tcb_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); u_int tid; int cookie; MPASS(m == NULL); tid = GET_TID(cpl); if (is_hpftid(iq->adapter, tid) || is_ftid(iq->adapter, tid)) { /* * The return code for filter-write is put in the CPL cookie so * we have to rely on the hardware tid (is_ftid) to determine * that this is a response to a filter. */ cookie = CPL_COOKIE_FILTER; } else { cookie = G_COOKIE(cpl->cookie); } MPASS(cookie > CPL_COOKIE_RESERVED); MPASS(cookie < nitems(set_tcb_rpl_handlers)); return (set_tcb_rpl_handlers[cookie](iq, rss, m)); } static int l2t_write_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int cookie; MPASS(m == NULL); cookie = GET_TID(rpl) & F_SYNC_WR ? CPL_COOKIE_TOM : CPL_COOKIE_FILTER; return (l2t_write_rpl_handlers[cookie](iq, rss, m)); } static int act_open_rpl_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int cookie = G_TID_COOKIE(G_AOPEN_ATID(be32toh(cpl->atid_status))); MPASS(m == NULL); MPASS(cookie != CPL_COOKIE_RESERVED); return (act_open_rpl_handlers[cookie](iq, rss, m)); } static int abort_rpl_rss_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; u_int cookie; MPASS(m == NULL); if (is_hashfilter(sc)) cookie = CPL_COOKIE_HASHFILTER; else cookie = CPL_COOKIE_TOM; return (abort_rpl_rss_handlers[cookie](iq, rss, m)); } static int fw4_ack_handler(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); u_int cookie; MPASS(m == NULL); if (is_etid(sc, tid)) cookie = CPL_COOKIE_ETHOFLD; else cookie = CPL_COOKIE_TOM; return (fw4_ack_handlers[cookie](iq, rss, m)); } static void t4_init_shared_cpl_handlers(void) { t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl_handler); t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl_handler); t4_register_cpl_handler(CPL_ACT_OPEN_RPL, act_open_rpl_handler); t4_register_cpl_handler(CPL_ABORT_RPL_RSS, abort_rpl_rss_handler); t4_register_cpl_handler(CPL_FW4_ACK, fw4_ack_handler); } void t4_register_shared_cpl_handler(int opcode, cpl_handler_t h, int cookie) { uintptr_t *loc; MPASS(opcode < nitems(t4_cpl_handler)); MPASS(cookie > CPL_COOKIE_RESERVED); MPASS(cookie < NUM_CPL_COOKIES); MPASS(t4_cpl_handler[opcode] != NULL); switch (opcode) { case CPL_SET_TCB_RPL: loc = (uintptr_t *)&set_tcb_rpl_handlers[cookie]; break; case CPL_L2T_WRITE_RPL: loc = (uintptr_t *)&l2t_write_rpl_handlers[cookie]; break; case CPL_ACT_OPEN_RPL: loc = (uintptr_t *)&act_open_rpl_handlers[cookie]; break; case CPL_ABORT_RPL_RSS: loc = (uintptr_t *)&abort_rpl_rss_handlers[cookie]; break; case CPL_FW4_ACK: loc = (uintptr_t *)&fw4_ack_handlers[cookie]; break; default: MPASS(0); return; } MPASS(h == NULL || *loc == (uintptr_t)NULL); atomic_store_rel_ptr(loc, (uintptr_t)h); } /* * Called on MOD_LOAD. Validates and calculates the SGE tunables. */ void t4_sge_modload(void) { if (fl_pktshift < 0 || fl_pktshift > 7) { printf("Invalid hw.cxgbe.fl_pktshift value (%d)," " using 0 instead.\n", fl_pktshift); fl_pktshift = 0; } if (spg_len != 64 && spg_len != 128) { int len; #if defined(__i386__) || defined(__amd64__) len = cpu_clflush_line_size > 64 ? 128 : 64; #else len = 64; #endif if (spg_len != -1) { printf("Invalid hw.cxgbe.spg_len value (%d)," " using %d instead.\n", spg_len, len); } spg_len = len; } if (cong_drop < -1 || cong_drop > 1) { printf("Invalid hw.cxgbe.cong_drop value (%d)," " using 0 instead.\n", cong_drop); cong_drop = 0; } if (tscale != 1 && (tscale < 3 || tscale > 17)) { printf("Invalid hw.cxgbe.tscale value (%d)," " using 1 instead.\n", tscale); tscale = 1; } extfree_refs = counter_u64_alloc(M_WAITOK); extfree_rels = counter_u64_alloc(M_WAITOK); counter_u64_zero(extfree_refs); counter_u64_zero(extfree_rels); t4_init_shared_cpl_handlers(); t4_register_cpl_handler(CPL_FW4_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); #ifdef RATELIMIT t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, CPL_COOKIE_ETHOFLD); #endif t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); } void t4_sge_modunload(void) { counter_u64_free(extfree_refs); counter_u64_free(extfree_rels); } uint64_t t4_sge_extfree_refs(void) { uint64_t refs, rels; rels = counter_u64_fetch(extfree_rels); refs = counter_u64_fetch(extfree_refs); return (refs - rels); } static inline void setup_pad_and_pack_boundaries(struct adapter *sc) { uint32_t v, m; int pad, pack, pad_shift; pad_shift = chip_id(sc) > CHELSIO_T5 ? X_T6_INGPADBOUNDARY_SHIFT : X_INGPADBOUNDARY_SHIFT; pad = fl_pad; if (fl_pad < (1 << pad_shift) || fl_pad > (1 << (pad_shift + M_INGPADBOUNDARY)) || !powerof2(fl_pad)) { /* * If there is any chance that we might use buffer packing and * the chip is a T4, then pick 64 as the pad/pack boundary. Set * it to the minimum allowed in all other cases. */ pad = is_t4(sc) && buffer_packing ? 64 : 1 << pad_shift; /* * For fl_pad = 0 we'll still write a reasonable value to the * register but all the freelists will opt out of padding. * We'll complain here only if the user tried to set it to a * value greater than 0 that was invalid. */ if (fl_pad > 0) { device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" " (%d), using %d instead.\n", fl_pad, pad); } } m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); v = V_INGPADBOUNDARY(ilog2(pad) - pad_shift); t4_set_reg_field(sc, A_SGE_CONTROL, m, v); if (is_t4(sc)) { if (fl_pack != -1 && fl_pack != pad) { /* Complain but carry on. */ device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," " using %d instead.\n", fl_pack, pad); } return; } pack = fl_pack; if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || !powerof2(fl_pack)) { pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); MPASS(powerof2(pack)); if (pack < 16) pack = 16; if (pack == 32) pack = 64; if (pack > 4096) pack = 4096; if (fl_pack != -1) { device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" " (%d), using %d instead.\n", fl_pack, pack); } } m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); if (pack == 16) v = V_INGPACKBOUNDARY(0); else v = V_INGPACKBOUNDARY(ilog2(pack) - 5); MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); } /* * adap->params.vpd.cclk must be set up before this is called. */ void t4_tweak_chip_settings(struct adapter *sc) { int i; uint32_t v, m; int intr_timer[SGE_NTIMERS] = {1, 5, 10, 50, 100, 200}; int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); static int sge_flbuf_sizes[] = { MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, MJUMPAGESIZE - CL_METADATA_SIZE, MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, #endif MJUM9BYTES, MJUM16BYTES, MCLBYTES - MSIZE - CL_METADATA_SIZE, MJUM9BYTES - CL_METADATA_SIZE, MJUM16BYTES - CL_METADATA_SIZE, }; KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); t4_set_reg_field(sc, A_SGE_CONTROL, m, v); setup_pad_and_pack_boundaries(sc); v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF3(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF4(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF5(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF6(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, ("%s: hw buffer size table too big", __func__)); for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), sge_flbuf_sizes[i]); } v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | V_THRESHOLD_2(intr_pktcount[2]) | V_THRESHOLD_3(intr_pktcount[3]); t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD, v); KASSERT(intr_timer[0] <= timer_max, ("%s: not a single usable timer (%d, %d)", __func__, intr_timer[0], timer_max)); for (i = 1; i < nitems(intr_timer); i++) { KASSERT(intr_timer[i] >= intr_timer[i - 1], ("%s: timers not listed in increasing order (%d)", __func__, i)); while (intr_timer[i] > timer_max) { if (i == nitems(intr_timer) - 1) { intr_timer[i] = timer_max; break; } intr_timer[i] += intr_timer[i - 1]; intr_timer[i] /= 2; } } v = V_TIMERVALUE0(us_to_core_ticks(sc, intr_timer[0])) | V_TIMERVALUE1(us_to_core_ticks(sc, intr_timer[1])); t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1, v); v = V_TIMERVALUE2(us_to_core_ticks(sc, intr_timer[2])) | V_TIMERVALUE3(us_to_core_ticks(sc, intr_timer[3])); t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3, v); v = V_TIMERVALUE4(us_to_core_ticks(sc, intr_timer[4])) | V_TIMERVALUE5(us_to_core_ticks(sc, intr_timer[5])); t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5, v); if (chip_id(sc) >= CHELSIO_T6) { m = V_TSCALE(M_TSCALE); if (tscale == 1) v = 0; else v = V_TSCALE(tscale - 2); t4_set_reg_field(sc, A_SGE_ITP_CONTROL, m, v); if (sc->debug_flags & DF_DISABLE_TCB_CACHE) { m = V_RDTHRESHOLD(M_RDTHRESHOLD) | F_WRTHRTHRESHEN | V_WRTHRTHRESH(M_WRTHRTHRESH); t4_tp_pio_read(sc, &v, 1, A_TP_CMM_CONFIG, 1); v &= ~m; v |= V_RDTHRESHOLD(1) | F_WRTHRTHRESHEN | V_WRTHRTHRESH(16); t4_tp_pio_write(sc, &v, 1, A_TP_CMM_CONFIG, 1); } } /* 4K, 16K, 64K, 256K DDP "page sizes" for TDDP */ v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, v); /* * 4K, 8K, 16K, 64K DDP "page sizes" for iSCSI DDP. These have been * chosen with MAXPHYS = 128K in mind. The largest DDP buffer that we * may have to deal with is MAXPHYS + 1 page. */ v = V_HPZ0(0) | V_HPZ1(1) | V_HPZ2(2) | V_HPZ3(4); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, v); /* We use multiple DDP page sizes both in plain-TOE and ISCSI modes. */ m = v = F_TDDPTAGTCB | F_ISCSITAGTCB; t4_set_reg_field(sc, A_ULP_RX_CTL, m, v); m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; t4_set_reg_field(sc, A_TP_PARA_REG5, m, v); } /* * SGE wants the buffer to be at least 64B and then a multiple of 16. If * padding is in use, the buffer's start and end need to be aligned to the pad * boundary as well. We'll just make sure that the size is a multiple of the * boundary here, it is up to the buffer allocation code to make sure the start * of the buffer is aligned as well. */ static inline int hwsz_ok(struct adapter *sc, int hwsz) { int mask = fl_pad ? sc->params.sge.pad_boundary - 1 : 16 - 1; return (hwsz >= 64 && (hwsz & mask) == 0); } /* * XXX: driver really should be able to deal with unexpected settings. */ int t4_read_chip_settings(struct adapter *sc) { struct sge *s = &sc->sge; struct sge_params *sp = &sc->params.sge; int i, j, n, rc = 0; uint32_t m, v, r; uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); static int sw_buf_sizes[] = { /* Sorted by size */ MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, #endif MJUM9BYTES, MJUM16BYTES }; struct sw_zone_info *swz, *safe_swz; struct hw_buf_info *hwb; m = F_RXPKTCPLMODE; v = F_RXPKTCPLMODE; r = sc->params.sge.sge_control; if ((r & m) != v) { device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); rc = EINVAL; } /* * If this changes then every single use of PAGE_SHIFT in the driver * needs to be carefully reviewed for PAGE_SHIFT vs sp->page_shift. */ if (sp->page_shift != PAGE_SHIFT) { device_printf(sc->dev, "invalid SGE_HOST_PAGE_SIZE(0x%x)\n", r); rc = EINVAL; } /* Filter out unusable hw buffer sizes entirely (mark with -2). */ hwb = &s->hw_buf_info[0]; for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { r = sc->params.sge.sge_fl_buffer_size[i]; hwb->size = r; hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; hwb->next = -1; } /* * Create a sorted list in decreasing order of hw buffer sizes (and so * increasing order of spare area) for each software zone. * * If padding is enabled then the start and end of the buffer must align * to the pad boundary; if packing is enabled then they must align with * the pack boundary as well. Allocations from the cluster zones are * aligned to min(size, 4K), so the buffer starts at that alignment and * ends at hwb->size alignment. If mbuf inlining is allowed the * starting alignment will be reduced to MSIZE and the driver will * exercise appropriate caution when deciding on the best buffer layout * to use. */ n = 0; /* no usable buffer size to begin with */ swz = &s->sw_zone_info[0]; safe_swz = NULL; for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { int8_t head = -1, tail = -1; swz->size = sw_buf_sizes[i]; swz->zone = m_getzone(swz->size); swz->type = m_gettype(swz->size); if (swz->size < PAGE_SIZE) { MPASS(powerof2(swz->size)); if (fl_pad && (swz->size % sp->pad_boundary != 0)) continue; } if (swz->size == safest_rx_cluster) safe_swz = swz; hwb = &s->hw_buf_info[0]; for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { if (hwb->zidx != -1 || hwb->size > swz->size) continue; #ifdef INVARIANTS if (fl_pad) MPASS(hwb->size % sp->pad_boundary == 0); #endif hwb->zidx = i; if (head == -1) head = tail = j; else if (hwb->size < s->hw_buf_info[tail].size) { s->hw_buf_info[tail].next = j; tail = j; } else { int8_t *cur; struct hw_buf_info *t; for (cur = &head; *cur != -1; cur = &t->next) { t = &s->hw_buf_info[*cur]; if (hwb->size == t->size) { hwb->zidx = -2; break; } if (hwb->size > t->size) { hwb->next = *cur; *cur = j; break; } } } } swz->head_hwidx = head; swz->tail_hwidx = tail; if (tail != -1) { n++; if (swz->size - s->hw_buf_info[tail].size >= CL_METADATA_SIZE) sc->flags |= BUF_PACKING_OK; } } if (n == 0) { device_printf(sc->dev, "no usable SGE FL buffer size.\n"); rc = EINVAL; } s->safe_hwidx1 = -1; s->safe_hwidx2 = -1; if (safe_swz != NULL) { s->safe_hwidx1 = safe_swz->head_hwidx; for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { int spare; hwb = &s->hw_buf_info[i]; #ifdef INVARIANTS if (fl_pad) MPASS(hwb->size % sp->pad_boundary == 0); #endif spare = safe_swz->size - hwb->size; if (spare >= CL_METADATA_SIZE) { s->safe_hwidx2 = i; break; } } } if (sc->flags & IS_VF) return (0); v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); if (r != v) { device_printf(sc->dev, "invalid ULP_RX_TDDP_PSZ(0x%x)\n", r); rc = EINVAL; } m = v = F_TDDPTAGTCB; r = t4_read_reg(sc, A_ULP_RX_CTL); if ((r & m) != v) { device_printf(sc->dev, "invalid ULP_RX_CTL(0x%x)\n", r); rc = EINVAL; } m = V_INDICATESIZE(M_INDICATESIZE) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; v = V_INDICATESIZE(indsz) | F_REARMDDPOFFSET | F_RESETDDPOFFSET; r = t4_read_reg(sc, A_TP_PARA_REG5); if ((r & m) != v) { device_printf(sc->dev, "invalid TP_PARA_REG5(0x%x)\n", r); rc = EINVAL; } t4_init_tp_params(sc, 1); t4_read_mtu_tbl(sc, sc->params.mtus, NULL); t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd, sc->params.b_wnd); return (rc); } int t4_create_dma_tag(struct adapter *sc) { int rc; rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE, BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->dmat); if (rc != 0) { device_printf(sc->dev, "failed to create main DMA tag: %d\n", rc); } return (rc); } void t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *children) { struct sge_params *sp = &sc->params.sge; SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", "freelist buffer sizes"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, NULL, sp->fl_pktshift, "payload DMA offset in rx buffer (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, NULL, sp->pad_boundary, "payload pad boundary (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, NULL, sp->spg_len, "status page size (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, NULL, cong_drop, "congestion drop setting"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, NULL, sp->pack_boundary, "payload pack boundary (bytes)"); } int t4_destroy_dma_tag(struct adapter *sc) { if (sc->dmat) bus_dma_tag_destroy(sc->dmat); return (0); } /* * Allocate and initialize the firmware event queue and the management queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. */ int t4_setup_adapter_queues(struct adapter *sc) { int rc; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); sysctl_ctx_init(&sc->ctx); sc->flags |= ADAP_SYSCTL_CTX; /* * Firmware event queue */ rc = alloc_fwq(sc); if (rc != 0) return (rc); /* * Management queue. This is just a control queue that uses the fwq as * its associated iq. */ if (!(sc->flags & IS_VF)) rc = alloc_mgmtq(sc); return (rc); } /* * Idempotent */ int t4_teardown_adapter_queues(struct adapter *sc) { ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* Do this before freeing the queue */ if (sc->flags & ADAP_SYSCTL_CTX) { sysctl_ctx_free(&sc->ctx); sc->flags &= ~ADAP_SYSCTL_CTX; } free_mgmtq(sc); free_fwq(sc); return (0); } /* Maximum payload that can be delivered with a single iq descriptor */ static inline int mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) { int payload; #ifdef TCP_OFFLOAD if (toe) { int rxcs = G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)); /* Note that COP can set rx_coalesce on/off per connection. */ payload = max(mtu, rxcs); } else { #endif /* large enough even when hw VLAN extraction is disabled */ payload = sc->params.sge.fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu; #ifdef TCP_OFFLOAD } #endif return (payload); } int t4_setup_vi_queues(struct vi_info *vi) { int rc = 0, i, intr_idx, iqidx; struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *ctrlq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #if defined(TCP_OFFLOAD) || defined(RATELIMIT) struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP int saved_idx; struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif char name[16]; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; struct sysctl_oid *oid = device_get_sysctl_tree(vi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ intr_idx = vi->first_intr; #ifdef DEV_NETMAP saved_idx = intr_idx; if (ifp->if_capabilities & IFCAP_NETMAP) { /* netmap is supported with direct interrupts only. */ MPASS(!forwarding_intr_to_fwq(sc)); /* * We don't have buffers to back the netmap rx queues * right now so we create the queues in a way that * doesn't set off any congestion signal in the chip. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_rxq", CTLFLAG_RD, NULL, "rx queues"); for_each_nm_rxq(vi, i, nm_rxq) { rc = alloc_nm_rxq(vi, nm_rxq, intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "nm_txq", CTLFLAG_RD, NULL, "tx queues"); for_each_nm_txq(vi, i, nm_txq) { iqidx = vi->first_nm_rxq + (i % vi->nnmrxq); rc = alloc_nm_txq(vi, nm_txq, iqidx, i, oid); if (rc != 0) goto done; } } /* Normal rx queues and netmap rx queues share the same interrupts. */ intr_idx = saved_idx; #endif /* * Allocate rx queues first because a default iqid is required when * creating a tx queue. */ maxp = mtu_to_max_payload(sc, mtu, 0); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); for_each_rxq(vi, i, rxq) { init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(vi->dev), i); init_fl(sc, &rxq->fl, vi->qsize_rxq / 8, maxp, name); rc = alloc_rxq(vi, rxq, forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) intr_idx = saved_idx + max(vi->nrxq, vi->nnmrxq); #endif #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_rxq", CTLFLAG_RD, NULL, "rx queues for offloaded TCP connections"); for_each_ofld_rxq(vi, i, ofld_rxq) { init_iq(&ofld_rxq->iq, sc, vi->ofld_tmr_idx, vi->ofld_pktc_idx, vi->qsize_rxq); snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", device_get_nameunit(vi->dev), i); init_fl(sc, &ofld_rxq->fl, vi->qsize_rxq / 8, maxp, name); rc = alloc_ofld_rxq(vi, ofld_rxq, forwarding_intr_to_fwq(sc) ? -1 : intr_idx, i, oid); if (rc != 0) goto done; intr_idx++; } #endif /* * Now the tx queues. */ oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "txq", CTLFLAG_RD, NULL, "tx queues"); for_each_txq(vi, i, txq) { iqidx = vi->first_rxq + (i % vi->nrxq); snprintf(name, sizeof(name), "%s txq%d", device_get_nameunit(vi->dev), i); init_eq(sc, &txq->eq, EQ_ETH, vi->qsize_txq, pi->tx_chan, sc->sge.rxq[iqidx].iq.cntxt_id, name); rc = alloc_txq(vi, txq, i, oid); if (rc != 0) goto done; } #if defined(TCP_OFFLOAD) || defined(RATELIMIT) oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ofld_txq", CTLFLAG_RD, NULL, "tx queues for TOE/ETHOFLD"); for_each_ofld_txq(vi, i, ofld_txq) { struct sysctl_oid *oid2; snprintf(name, sizeof(name), "%s ofld_txq%d", device_get_nameunit(vi->dev), i); #ifdef TCP_OFFLOAD iqidx = vi->first_ofld_rxq + (i % vi->nofldrxq); init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, sc->sge.ofld_rxq[iqidx].iq.cntxt_id, name); #else iqidx = vi->first_rxq + (i % vi->nrxq); init_eq(sc, &ofld_txq->eq, EQ_OFLD, vi->qsize_txq, pi->tx_chan, sc->sge.rxq[iqidx].iq.cntxt_id, name); #endif snprintf(name, sizeof(name), "%d", i); oid2 = SYSCTL_ADD_NODE(&vi->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "offload tx queue"); rc = alloc_wrq(sc, vi, ofld_txq, oid2); if (rc != 0) goto done; } #endif /* * Finally, the control queue. */ if (!IS_MAIN_VI(vi) || sc->flags & IS_VF) goto done; oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queue"); ctrlq = &sc->sge.ctrlq[pi->port_id]; snprintf(name, sizeof(name), "%s ctrlq", device_get_nameunit(vi->dev)); init_eq(sc, &ctrlq->eq, EQ_CTRL, CTRL_EQ_QSIZE, pi->tx_chan, sc->sge.rxq[vi->first_rxq].iq.cntxt_id, name); rc = alloc_wrq(sc, vi, ctrlq, oid); done: if (rc) t4_teardown_vi_queues(vi); return (rc); } /* * Idempotent */ int t4_teardown_vi_queues(struct vi_info *vi) { int i; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #if defined(TCP_OFFLOAD) || defined(RATELIMIT) struct sge_wrq *ofld_txq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; struct sge_nm_txq *nm_txq; #endif /* Do this before freeing the queues */ if (vi->flags & VI_SYSCTL_CTX) { sysctl_ctx_free(&vi->ctx); vi->flags &= ~VI_SYSCTL_CTX; } #ifdef DEV_NETMAP if (vi->ifp->if_capabilities & IFCAP_NETMAP) { for_each_nm_txq(vi, i, nm_txq) { free_nm_txq(vi, nm_txq); } for_each_nm_rxq(vi, i, nm_rxq) { free_nm_rxq(vi, nm_rxq); } } #endif /* * Take down all the tx queues first, as they reference the rx queues * (for egress updates, etc.). */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { free_txq(vi, txq); } #if defined(TCP_OFFLOAD) || defined(RATELIMIT) for_each_ofld_txq(vi, i, ofld_txq) { free_wrq(sc, ofld_txq); } #endif /* * Then take down the rx queues. */ for_each_rxq(vi, i, rxq) { free_rxq(vi, rxq); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { free_ofld_rxq(vi, ofld_rxq); } #endif return (0); } /* - * Deals with errors and the firmware event queue. All data rx queues forward - * their interrupt to the firmware event queue. + * Interrupt handler when the driver is using only 1 interrupt. This is a very + * unusual scenario. + * + * a) Deals with errors, if any. + * b) Services firmware event queue, which is taking interrupts for all other + * queues. */ void t4_intr_all(void *arg) { struct adapter *sc = arg; struct sge_iq *fwq = &sc->sge.fwq; + MPASS(sc->intr_count == 1); + t4_intr_err(arg); - if (atomic_cmpset_int(&fwq->state, IQS_IDLE, IQS_BUSY)) { - service_iq(fwq, 0); - atomic_cmpset_int(&fwq->state, IQS_BUSY, IQS_IDLE); - } + t4_intr_evt(fwq); } -/* Deals with error interrupts */ +/* + * Interrupt handler for errors (installed directly when multiple interrupts are + * being used, or called by t4_intr_all). + */ void t4_intr_err(void *arg) { struct adapter *sc = arg; t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0); t4_slow_intr_handler(sc); } +/* + * Interrupt handler for iq-only queues. The firmware event queue is the only + * such queue right now. + */ void t4_intr_evt(void *arg) { struct sge_iq *iq = arg; if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { service_iq(iq, 0); atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } +/* + * Interrupt handler for iq+fl queues. + */ void t4_intr(void *arg) { struct sge_iq *iq = arg; if (atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_BUSY)) { - service_iq(iq, 0); + service_iq_fl(iq, 0); atomic_cmpset_int(&iq->state, IQS_BUSY, IQS_IDLE); } } +#ifdef DEV_NETMAP +/* + * Interrupt handler for netmap rx queues. + */ void -t4_vi_intr(void *arg) +t4_nm_intr(void *arg) { - struct irq *irq = arg; + struct sge_nm_rxq *nm_rxq = arg; -#ifdef DEV_NETMAP - if (atomic_cmpset_int(&irq->nm_state, NM_ON, NM_BUSY)) { - t4_nm_intr(irq->nm_rxq); - atomic_cmpset_int(&irq->nm_state, NM_BUSY, NM_ON); + if (atomic_cmpset_int(&nm_rxq->nm_state, NM_ON, NM_BUSY)) { + service_nm_rxq(nm_rxq); + atomic_cmpset_int(&nm_rxq->nm_state, NM_BUSY, NM_ON); } -#endif - if (irq->rxq != NULL) - t4_intr(irq->rxq); } -static inline int -sort_before_lro(struct lro_ctrl *lro) +/* + * Interrupt handler for vectors shared between NIC and netmap rx queues. + */ +void +t4_vi_intr(void *arg) { + struct irq *irq = arg; - return (lro->lro_mbuf_max != 0); + MPASS(irq->nm_rxq != NULL); + t4_nm_intr(irq->nm_rxq); + + MPASS(irq->rxq != NULL); + t4_intr(irq->rxq); } +#endif /* - * Deals with anything and everything on the given ingress queue. + * Deals with interrupts on an iq-only (no freelist) queue. */ static int service_iq(struct sge_iq *iq, int budget) { struct sge_iq *q; - struct sge_rxq *rxq = iq_to_rxq(iq); /* Use iff iq is part of rxq */ - struct sge_fl *fl; /* Use iff IQ_HAS_FL */ struct adapter *sc = iq->adapter; struct iq_desc *d = &iq->desc[iq->cidx]; int ndescs = 0, limit; - int rsp_type, refill; + int rsp_type; uint32_t lq; - uint16_t fl_hw_cidx; - struct mbuf *m0; STAILQ_HEAD(, sge_iq) iql = STAILQ_HEAD_INITIALIZER(iql); -#if defined(INET) || defined(INET6) - const struct timeval lro_timeout = {0, sc->lro_timeout}; - struct lro_ctrl *lro = &rxq->lro; -#endif KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); + KASSERT((iq->flags & IQ_HAS_FL) == 0, + ("%s: called for iq %p with fl (iq->flags 0x%x)", __func__, iq, + iq->flags)); + MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); + MPASS((iq->flags & IQ_LRO_ENABLED) == 0); limit = budget ? budget : iq->qsize / 16; - if (iq->flags & IQ_HAS_FL) { - fl = &rxq->fl; - fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ - } else { - fl = NULL; - fl_hw_cidx = 0; /* to silence gcc warning */ - } - -#if defined(INET) || defined(INET6) - if (iq->flags & IQ_ADJ_CREDIT) { - MPASS(sort_before_lro(lro)); - iq->flags &= ~IQ_ADJ_CREDIT; - if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { - tcp_lro_flush_all(lro); - t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | - V_INGRESSQID((u32)iq->cntxt_id) | - V_SEINTARM(iq->intr_params)); - return (0); - } - ndescs = 1; - } -#else - MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); -#endif - /* * We always come back and check the descriptor ring for new indirect * interrupts and other responses after running a single handler. */ for (;;) { while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { rmb(); - refill = 0; - m0 = NULL; rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); lq = be32toh(d->rsp.pldbuflen_qid); switch (rsp_type) { case X_RSPD_TYPE_FLBUF: + panic("%s: data for an iq (%p) with no freelist", + __func__, iq); - KASSERT(iq->flags & IQ_HAS_FL, - ("%s: data for an iq (%p) with no freelist", - __func__, iq)); + /* NOTREACHED */ - m0 = get_fl_payload(sc, fl, lq); - if (__predict_false(m0 == NULL)) - goto process_iql; - refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; -#ifdef T4_PKT_TIMESTAMP - /* - * 60 bit timestamp for the payload is - * *(uint64_t *)m0->m_pktdat. Note that it is - * in the leading free-space in the mbuf. The - * kernel can clobber it during a pullup, - * m_copymdata, etc. You need to make sure that - * the mbuf reaches you unmolested if you care - * about the timestamp. - */ - *(uint64_t *)m0->m_pktdat = - be64toh(ctrl->u.last_flit) & - 0xfffffffffffffff; -#endif - - /* fall through */ - case X_RSPD_TYPE_CPL: KASSERT(d->rss.opcode < NUM_CPL_CMDS, ("%s: bad opcode %02x.", __func__, d->rss.opcode)); - t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); + t4_cpl_handler[d->rss.opcode](iq, &d->rss, NULL); break; case X_RSPD_TYPE_INTR: - /* - * Interrupts should be forwarded only to queues - * that are not forwarding their interrupts. - * This means service_iq can recurse but only 1 - * level deep. - */ - KASSERT(budget == 0, - ("%s: budget %u, rsp_type %u", __func__, - budget, rsp_type)); - - /* * There are 1K interrupt-capable queues (qids 0 * through 1023). A response type indicating a * forwarded interrupt with a qid >= 1K is an * iWARP async notification. */ - if (lq >= 1024) { - t4_an_handler(iq, &d->rsp); - break; - } + if (__predict_true(lq >= 1024)) { + t4_an_handler(iq, &d->rsp); + break; + } q = sc->sge.iqmap[lq - sc->sge.iq_start - sc->sge.iq_base]; if (atomic_cmpset_int(&q->state, IQS_IDLE, IQS_BUSY)) { - if (service_iq(q, q->qsize / 16) == 0) { + if (service_iq_fl(q, q->qsize / 16) == 0) { atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); } else { STAILQ_INSERT_TAIL(&iql, q, link); } } break; default: KASSERT(0, ("%s: illegal response type %d on iq %p", __func__, rsp_type, iq)); log(LOG_ERR, "%s: illegal response type %d on iq %p", device_get_nameunit(sc->dev), rsp_type, iq); break; } d++; if (__predict_false(++iq->cidx == iq->sidx)) { iq->cidx = 0; iq->gen ^= F_RSPD_GEN; d = &iq->desc[0]; } if (__predict_false(++ndescs == limit)) { t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); ndescs = 0; -#if defined(INET) || defined(INET6) - if (iq->flags & IQ_LRO_ENABLED && - !sort_before_lro(lro) && - sc->lro_timeout != 0) { - tcp_lro_flush_inactive(lro, - &lro_timeout); - } -#endif - if (budget) { - if (iq->flags & IQ_HAS_FL) { - FL_LOCK(fl); - refill_fl(sc, fl, 32); - FL_UNLOCK(fl); - } return (EINPROGRESS); } } - if (refill) { - FL_LOCK(fl); - refill_fl(sc, fl, 32); - FL_UNLOCK(fl); - fl_hw_cidx = fl->hw_cidx; - } } -process_iql: if (STAILQ_EMPTY(&iql)) break; /* * Process the head only, and send it to the back of the list if * it's still not done. */ q = STAILQ_FIRST(&iql); STAILQ_REMOVE_HEAD(&iql, link); - if (service_iq(q, q->qsize / 8) == 0) + if (service_iq_fl(q, q->qsize / 8) == 0) atomic_cmpset_int(&q->state, IQS_BUSY, IQS_IDLE); else STAILQ_INSERT_TAIL(&iql, q, link); } + t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | + V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); + + return (0); +} + +static inline int +sort_before_lro(struct lro_ctrl *lro) +{ + + return (lro->lro_mbuf_max != 0); +} + +/* + * Deals with interrupts on an iq+fl queue. + */ +static int +service_iq_fl(struct sge_iq *iq, int budget) +{ + struct sge_rxq *rxq = iq_to_rxq(iq); + struct sge_fl *fl; + struct adapter *sc = iq->adapter; + struct iq_desc *d = &iq->desc[iq->cidx]; + int ndescs = 0, limit; + int rsp_type, refill, starved; + uint32_t lq; + uint16_t fl_hw_cidx; + struct mbuf *m0; #if defined(INET) || defined(INET6) + const struct timeval lro_timeout = {0, sc->lro_timeout}; + struct lro_ctrl *lro = &rxq->lro; +#endif + + KASSERT(iq->state == IQS_BUSY, ("%s: iq %p not BUSY", __func__, iq)); + MPASS(iq->flags & IQ_HAS_FL); + + limit = budget ? budget : iq->qsize / 16; + fl = &rxq->fl; + fl_hw_cidx = fl->hw_cidx; /* stable snapshot */ + +#if defined(INET) || defined(INET6) + if (iq->flags & IQ_ADJ_CREDIT) { + MPASS(sort_before_lro(lro)); + iq->flags &= ~IQ_ADJ_CREDIT; + if ((d->rsp.u.type_gen & F_RSPD_GEN) != iq->gen) { + tcp_lro_flush_all(lro); + t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(1) | + V_INGRESSQID((u32)iq->cntxt_id) | + V_SEINTARM(iq->intr_params)); + return (0); + } + ndescs = 1; + } +#else + MPASS((iq->flags & IQ_ADJ_CREDIT) == 0); +#endif + + while ((d->rsp.u.type_gen & F_RSPD_GEN) == iq->gen) { + + rmb(); + + refill = 0; + m0 = NULL; + rsp_type = G_RSPD_TYPE(d->rsp.u.type_gen); + lq = be32toh(d->rsp.pldbuflen_qid); + + switch (rsp_type) { + case X_RSPD_TYPE_FLBUF: + + m0 = get_fl_payload(sc, fl, lq); + if (__predict_false(m0 == NULL)) + goto out; + refill = IDXDIFF(fl->hw_cidx, fl_hw_cidx, fl->sidx) > 2; +#ifdef T4_PKT_TIMESTAMP + /* + * 60 bit timestamp for the payload is + * *(uint64_t *)m0->m_pktdat. Note that it is + * in the leading free-space in the mbuf. The + * kernel can clobber it during a pullup, + * m_copymdata, etc. You need to make sure that + * the mbuf reaches you unmolested if you care + * about the timestamp. + */ + *(uint64_t *)m0->m_pktdat = + be64toh(ctrl->u.last_flit) & 0xfffffffffffffff; +#endif + + /* fall through */ + + case X_RSPD_TYPE_CPL: + KASSERT(d->rss.opcode < NUM_CPL_CMDS, + ("%s: bad opcode %02x.", __func__, d->rss.opcode)); + t4_cpl_handler[d->rss.opcode](iq, &d->rss, m0); + break; + + case X_RSPD_TYPE_INTR: + + /* + * There are 1K interrupt-capable queues (qids 0 + * through 1023). A response type indicating a + * forwarded interrupt with a qid >= 1K is an + * iWARP async notification. That is the only + * acceptable indirect interrupt on this queue. + */ + if (__predict_false(lq < 1024)) { + panic("%s: indirect interrupt on iq_fl %p " + "with qid %u", __func__, iq, lq); + } + + t4_an_handler(iq, &d->rsp); + break; + + default: + KASSERT(0, ("%s: illegal response type %d on iq %p", + __func__, rsp_type, iq)); + log(LOG_ERR, "%s: illegal response type %d on iq %p", + device_get_nameunit(sc->dev), rsp_type, iq); + break; + } + + d++; + if (__predict_false(++iq->cidx == iq->sidx)) { + iq->cidx = 0; + iq->gen ^= F_RSPD_GEN; + d = &iq->desc[0]; + } + if (__predict_false(++ndescs == limit)) { + t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | + V_INGRESSQID(iq->cntxt_id) | + V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); + ndescs = 0; + +#if defined(INET) || defined(INET6) + if (iq->flags & IQ_LRO_ENABLED && + !sort_before_lro(lro) && + sc->lro_timeout != 0) { + tcp_lro_flush_inactive(lro, &lro_timeout); + } +#endif + if (budget) { + FL_LOCK(fl); + refill_fl(sc, fl, 32); + FL_UNLOCK(fl); + + return (EINPROGRESS); + } + } + if (refill) { + FL_LOCK(fl); + refill_fl(sc, fl, 32); + FL_UNLOCK(fl); + fl_hw_cidx = fl->hw_cidx; + } + } +out: +#if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (ndescs > 0 && lro->lro_mbuf_count > 8) { MPASS(sort_before_lro(lro)); /* hold back one credit and don't flush LRO state */ iq->flags |= IQ_ADJ_CREDIT; ndescs--; } else { tcp_lro_flush_all(lro); } } #endif t4_write_reg(sc, sc->sge_gts_reg, V_CIDXINC(ndescs) | V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params)); - if (iq->flags & IQ_HAS_FL) { - int starved; - - FL_LOCK(fl); - starved = refill_fl(sc, fl, 64); - FL_UNLOCK(fl); - if (__predict_false(starved != 0)) - add_fl_to_sfl(sc, fl); - } + FL_LOCK(fl); + starved = refill_fl(sc, fl, 64); + FL_UNLOCK(fl); + if (__predict_false(starved != 0)) + add_fl_to_sfl(sc, fl); return (0); } static inline int cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) { int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; if (rc) MPASS(cll->region3 >= CL_METADATA_SIZE); return (rc); } static inline struct cluster_metadata * cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, caddr_t cl) { if (cl_has_metadata(fl, cll)) { struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; return ((struct cluster_metadata *)(cl + swz->size) - 1); } return (NULL); } static void rxb_free(struct mbuf *m) { uma_zone_t zone = m->m_ext.ext_arg1; void *cl = m->m_ext.ext_arg2; uma_zfree(zone, cl); counter_u64_add(extfree_rels, 1); } /* * The mbuf returned by this function could be allocated from zone_mbuf or * constructed in spare room in the cluster. * * The mbuf carries the payload in one of these ways * a) frame inside the mbuf (mbuf from zone_mbuf) * b) m_cljset (for clusters without metadata) zone_mbuf * c) m_extaddref (cluster with metadata) inline mbuf * d) m_extaddref (cluster with metadata) zone_mbuf */ static struct mbuf * get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, int remaining) { struct mbuf *m; struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; struct cluster_layout *cll = &sd->cll; struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); int len, blen; caddr_t payload; blen = hwb->size - fl->rx_offset; /* max possible in this buf */ len = min(remaining, blen); payload = sd->cl + cll->region1 + fl->rx_offset; if (fl->flags & FL_BUF_PACKING) { const u_int l = fr_offset + len; const u_int pad = roundup2(l, fl->buf_boundary) - l; if (fl->rx_offset + len + pad < hwb->size) blen = len + pad; MPASS(fl->rx_offset + blen <= hwb->size); } else { MPASS(fl->rx_offset == 0); /* not packing */ } if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { /* * Copy payload into a freshly allocated mbuf. */ m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; #ifdef T4_PKT_TIMESTAMP /* Leave room for a timestamp */ m->m_data += 8; #endif /* copy data to mbuf */ bcopy(payload, mtod(m, caddr_t), len); } else if (sd->nmbuf * MSIZE < cll->region1) { /* * There's spare room in the cluster for an mbuf. Create one * and associate it with the payload that's in the cluster. */ MPASS(clm != NULL); m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); /* No bzero required */ if (m_init(m, M_NOWAIT, MT_DATA, fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) return (NULL); fl->mbuf_inlined++; m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); } else { /* * Grab an mbuf from zone_mbuf and associate it with the * payload in the cluster. */ m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; if (clm != NULL) { m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); } else { m_cljset(m, sd->cl, swz->type); sd->cl = NULL; /* consumed, not a recycle candidate */ } } if (fr_offset == 0) m->m_pkthdr.len = remaining; m->m_len = len; if (fl->flags & FL_BUF_PACKING) { fl->rx_offset += blen; MPASS(fl->rx_offset <= hwb->size); if (fl->rx_offset < hwb->size) return (m); /* without advancing the cidx */ } if (__predict_false(++fl->cidx % 8 == 0)) { uint16_t cidx = fl->cidx / 8; if (__predict_false(cidx == fl->sidx)) fl->cidx = cidx = 0; fl->hw_cidx = cidx; } fl->rx_offset = 0; return (m); } static struct mbuf * get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) { struct mbuf *m0, *m, **pnext; u_int remaining; const u_int total = G_RSPD_LEN(len_newbuf); if (__predict_false(fl->flags & FL_BUF_RESUME)) { M_ASSERTPKTHDR(fl->m0); MPASS(fl->m0->m_pkthdr.len == total); MPASS(fl->remaining < total); m0 = fl->m0; pnext = fl->pnext; remaining = fl->remaining; fl->flags &= ~FL_BUF_RESUME; goto get_segment; } if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { fl->rx_offset = 0; if (__predict_false(++fl->cidx % 8 == 0)) { uint16_t cidx = fl->cidx / 8; if (__predict_false(cidx == fl->sidx)) fl->cidx = cidx = 0; fl->hw_cidx = cidx; } } /* * Payload starts at rx_offset in the current hw buffer. Its length is * 'len' and it may span multiple hw buffers. */ m0 = get_scatter_segment(sc, fl, 0, total); if (m0 == NULL) return (NULL); remaining = total - m0->m_len; pnext = &m0->m_next; while (remaining > 0) { get_segment: MPASS(fl->rx_offset == 0); m = get_scatter_segment(sc, fl, total - remaining, remaining); if (__predict_false(m == NULL)) { fl->m0 = m0; fl->pnext = pnext; fl->remaining = remaining; fl->flags |= FL_BUF_RESUME; return (NULL); } *pnext = m; pnext = &m->m_next; remaining -= m->m_len; } *pnext = NULL; M_ASSERTPKTHDR(m0); return (m0); } static int t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) { struct sge_rxq *rxq = iq_to_rxq(iq); struct ifnet *ifp = rxq->ifp; struct adapter *sc = iq->adapter; const struct cpl_rx_pkt *cpl = (const void *)(rss + 1); #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxq->lro; #endif static const int sw_hashtype[4][2] = { {M_HASHTYPE_NONE, M_HASHTYPE_NONE}, {M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6}, {M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6}, {M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6}, }; KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__, rss->opcode)); m0->m_pkthdr.len -= sc->params.sge.fl_pktshift; m0->m_len -= sc->params.sge.fl_pktshift; m0->m_data += sc->params.sge.fl_pktshift; m0->m_pkthdr.rcvif = ifp; M_HASHTYPE_SET(m0, sw_hashtype[rss->hash_type][rss->ipv6]); m0->m_pkthdr.flowid = be32toh(rss->hash_val); if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) { if (ifp->if_capenable & IFCAP_RXCSUM && cpl->l2info & htobe32(F_RXF_IP)) { m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); rxq->rxcsum++; } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && cpl->l2info & htobe32(F_RXF_IP6)) { m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); rxq->rxcsum++; } if (__predict_false(cpl->ip_frag)) m0->m_pkthdr.csum_data = be16toh(cpl->csum); else m0->m_pkthdr.csum_data = 0xffff; } if (cpl->vlan_ex) { m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan); m0->m_flags |= M_VLANTAG; rxq->vlan_extraction++; } #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (sort_before_lro(lro)) { tcp_lro_queue_mbuf(lro, m0); return (0); /* queued for sort, then LRO */ } if (tcp_lro_rx(lro, m0, 0) == 0) return (0); /* queued for LRO */ } #endif ifp->if_input(ifp, m0); return (0); } /* * Must drain the wrq or make sure that someone else will. */ static void wrq_tx_drain(void *arg, int n) { struct sge_wrq *wrq = arg; struct sge_eq *eq = &wrq->eq; EQ_LOCK(eq); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(wrq->adapter, wrq); EQ_UNLOCK(eq); } static void drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq) { struct sge_eq *eq = &wrq->eq; u_int available, dbdiff; /* # of hardware descriptors */ u_int n; struct wrqe *wr; struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ EQ_LOCK_ASSERT_OWNED(eq); MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs)); wr = STAILQ_FIRST(&wrq->wr_list); MPASS(wr != NULL); /* Must be called with something useful to do */ MPASS(eq->pidx == eq->dbidx); dbdiff = 0; do { eq->cidx = read_hw_cidx(eq); if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; MPASS(wr->wrq == wrq); n = howmany(wr->wr_len, EQ_ESIZE); if (available < n) break; dst = (void *)&eq->desc[eq->pidx]; if (__predict_true(eq->sidx - eq->pidx > n)) { /* Won't wrap, won't end exactly at the status page. */ bcopy(&wr->wr[0], dst, wr->wr_len); eq->pidx += n; } else { int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE; bcopy(&wr->wr[0], dst, first_portion); if (wr->wr_len > first_portion) { bcopy(&wr->wr[first_portion], &eq->desc[0], wr->wr_len - first_portion); } eq->pidx = n - (eq->sidx - eq->pidx); } wrq->tx_wrs_copied++; if (available < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } dbdiff += n; if (dbdiff >= 16) { ring_eq_db(sc, eq, dbdiff); dbdiff = 0; } STAILQ_REMOVE_HEAD(&wrq->wr_list, link); free_wrqe(wr); MPASS(wrq->nwr_pending > 0); wrq->nwr_pending--; MPASS(wrq->ndesc_needed >= n); wrq->ndesc_needed -= n; } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL); if (dbdiff) ring_eq_db(sc, eq, dbdiff); } /* * Doesn't fail. Holds on to work requests it can't send right away. */ void t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr) { #ifdef INVARIANTS struct sge_eq *eq = &wrq->eq; #endif EQ_LOCK_ASSERT_OWNED(eq); MPASS(wr != NULL); MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN); MPASS((wr->wr_len & 0x7) == 0); STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link); wrq->nwr_pending++; wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE); if (!TAILQ_EMPTY(&wrq->incomplete_wrs)) return; /* commit_wrq_wr will drain wr_list as well. */ drain_wrq_wr_list(sc, wrq); /* Doorbell must have caught up to the pidx. */ MPASS(eq->pidx == eq->dbidx); } void t4_update_fl_bufsize(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif struct sge_fl *fl; int i, maxp, mtu = ifp->if_mtu; maxp = mtu_to_max_payload(sc, mtu, 0); for_each_rxq(vi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #ifdef TCP_OFFLOAD maxp = mtu_to_max_payload(sc, mtu, 1); for_each_ofld_rxq(vi, i, ofld_rxq) { fl = &ofld_rxq->fl; FL_LOCK(fl); find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #endif } static inline int mbuf_nsegs(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.l5hlen > 0, ("%s: mbuf %p missing information on # of segments.", __func__, m)); return (m->m_pkthdr.l5hlen); } static inline void set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs) { M_ASSERTPKTHDR(m); m->m_pkthdr.l5hlen = nsegs; } static inline int mbuf_len16(struct mbuf *m) { int n; M_ASSERTPKTHDR(m); n = m->m_pkthdr.PH_loc.eight[0]; MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); return (n); } static inline void set_mbuf_len16(struct mbuf *m, uint8_t len16) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_loc.eight[0] = len16; } #ifdef RATELIMIT static inline int mbuf_eo_nsegs(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.PH_loc.eight[1]); } static inline void set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_loc.eight[1] = nsegs; } static inline int mbuf_eo_len16(struct mbuf *m) { int n; M_ASSERTPKTHDR(m); n = m->m_pkthdr.PH_loc.eight[2]; MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); return (n); } static inline void set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_loc.eight[2] = len16; } static inline int mbuf_eo_tsclk_tsoff(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.PH_loc.eight[3]); } static inline void set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; } static inline int needs_eo(struct mbuf *m) { return (m->m_pkthdr.snd_tag != NULL); } #endif static inline int needs_tso(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.csum_flags & CSUM_TSO); } static inline int needs_l3_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)); } static inline int needs_l4_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)); } static inline int needs_tcp_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO)); } #ifdef RATELIMIT static inline int needs_udp_csum(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)); } #endif static inline int needs_vlan_insertion(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_flags & M_VLANTAG); } static void * m_advance(struct mbuf **pm, int *poffset, int len) { struct mbuf *m = *pm; int offset = *poffset; uintptr_t p = 0; MPASS(len > 0); for (;;) { if (offset + len < m->m_len) { offset += len; p = mtod(m, uintptr_t) + offset; break; } len -= m->m_len - offset; m = m->m_next; offset = 0; MPASS(m != NULL); } *poffset = offset; *pm = m; return ((void *)p); } /* * Can deal with empty mbufs in the chain that have m_len = 0, but the chain * must have at least one mbuf that's not empty. It is possible for this * routine to return 0 if skip accounts for all the contents of the mbuf chain. */ static inline int count_mbuf_nsegs(struct mbuf *m, int skip) { vm_paddr_t lastb, next; vm_offset_t va; int len, nsegs; M_ASSERTPKTHDR(m); MPASS(m->m_pkthdr.len > 0); MPASS(m->m_pkthdr.len >= skip); nsegs = 0; lastb = 0; for (; m; m = m->m_next) { len = m->m_len; if (__predict_false(len == 0)) continue; if (skip >= len) { skip -= len; continue; } va = mtod(m, vm_offset_t) + skip; len -= skip; skip = 0; next = pmap_kextract(va); nsegs += sglist_count((void *)(uintptr_t)va, len); if (lastb + 1 == next) nsegs--; lastb = pmap_kextract(va + len - 1); } return (nsegs); } /* * Analyze the mbuf to determine its tx needs. The mbuf passed in may change: * a) caller can assume it's been freed if this function returns with an error. * b) it may get defragged up if the gather list is too long for the hardware. */ int parse_pkt(struct adapter *sc, struct mbuf **mp) { struct mbuf *m0 = *mp, *m; int rc, nsegs, defragged = 0, offset; struct ether_header *eh; void *l3hdr; #if defined(INET) || defined(INET6) struct tcphdr *tcp; #endif uint16_t eh_type; M_ASSERTPKTHDR(m0); if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) { rc = EINVAL; fail: m_freem(m0); *mp = NULL; return (rc); } restart: /* * First count the number of gather list segments in the payload. * Defrag the mbuf if nsegs exceeds the hardware limit. */ M_ASSERTPKTHDR(m0); MPASS(m0->m_pkthdr.len > 0); nsegs = count_mbuf_nsegs(m0, 0); if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { rc = EFBIG; goto fail; } *mp = m0 = m; /* update caller's copy after defrag */ goto restart; } if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) { m0 = m_pullup(m0, m0->m_pkthdr.len); if (m0 == NULL) { /* Should have left well enough alone. */ rc = EFBIG; goto fail; } *mp = m0; /* update caller's copy after pullup */ goto restart; } set_mbuf_nsegs(m0, nsegs); if (sc->flags & IS_VF) set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); else set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); #ifdef RATELIMIT /* * Ethofld is limited to TCP and UDP for now, and only when L4 hw * checksumming is enabled. needs_l4_csum happens to check for all the * right things. */ if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) m0->m_pkthdr.snd_tag = NULL; #endif if (!needs_tso(m0) && #ifdef RATELIMIT !needs_eo(m0) && #endif !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) return (0); m = m0; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); if (eh_type == ETHERTYPE_VLAN) { struct ether_vlan_header *evh = (void *)eh; eh_type = ntohs(evh->evl_proto); m0->m_pkthdr.l2hlen = sizeof(*evh); } else m0->m_pkthdr.l2hlen = sizeof(*eh); offset = 0; l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen); switch (eh_type) { #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = l3hdr; MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); m0->m_pkthdr.l3hlen = sizeof(*ip6); break; } #endif #ifdef INET case ETHERTYPE_IP: { struct ip *ip = l3hdr; m0->m_pkthdr.l3hlen = ip->ip_hl * 4; break; } #endif default: panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled" " with the same INET/INET6 options as the kernel.", __func__, eh_type); } #if defined(INET) || defined(INET6) if (needs_tcp_csum(m0)) { tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); m0->m_pkthdr.l4hlen = tcp->th_off * 4; #ifdef RATELIMIT if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { set_mbuf_eo_tsclk_tsoff(m0, V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); } else set_mbuf_eo_tsclk_tsoff(m0, 0); } else if (needs_udp_csum(m)) { m0->m_pkthdr.l4hlen = sizeof(struct udphdr); #endif } #ifdef RATELIMIT if (needs_eo(m0)) { u_int immhdrs; /* EO WRs have the headers in the WR and not the GL. */ immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; nsegs = count_mbuf_nsegs(m0, immhdrs); set_mbuf_eo_nsegs(m0, nsegs); set_mbuf_eo_len16(m0, txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); } #endif #endif MPASS(m0 == *mp); return (0); } void * start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie) { struct sge_eq *eq = &wrq->eq; struct adapter *sc = wrq->adapter; int ndesc, available; struct wrqe *wr; void *w; MPASS(len16 > 0); ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC); EQ_LOCK(eq); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); if (!STAILQ_EMPTY(&wrq->wr_list)) { slowpath: EQ_UNLOCK(eq); wr = alloc_wrqe(len16 * 16, wrq); if (__predict_false(wr == NULL)) return (NULL); cookie->pidx = -1; cookie->ndesc = ndesc; return (&wr->wr); } eq->cidx = read_hw_cidx(eq); if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; if (available < ndesc) goto slowpath; cookie->pidx = eq->pidx; cookie->ndesc = ndesc; TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link); w = &eq->desc[eq->pidx]; IDXINCR(eq->pidx, ndesc, eq->sidx); if (__predict_false(cookie->pidx + ndesc > eq->sidx)) { w = &wrq->ss[0]; wrq->ss_pidx = cookie->pidx; wrq->ss_len = len16 * 16; } EQ_UNLOCK(eq); return (w); } void commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie) { struct sge_eq *eq = &wrq->eq; struct adapter *sc = wrq->adapter; int ndesc, pidx; struct wrq_cookie *prev, *next; if (cookie->pidx == -1) { struct wrqe *wr = __containerof(w, struct wrqe, wr); t4_wrq_tx(sc, wr); return; } if (__predict_false(w == &wrq->ss[0])) { int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE; MPASS(wrq->ss_len > n); /* WR had better wrap around. */ bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n); bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n); wrq->tx_wrs_ss++; } else wrq->tx_wrs_direct++; EQ_LOCK(eq); ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */ pidx = cookie->pidx; MPASS(pidx >= 0 && pidx < eq->sidx); prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link); next = TAILQ_NEXT(cookie, link); if (prev == NULL) { MPASS(pidx == eq->dbidx); if (next == NULL || ndesc >= 16) { int available; struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */ /* * Note that the WR via which we'll request tx updates * is at pidx and not eq->pidx, which has moved on * already. */ dst = (void *)&eq->desc[pidx]; available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; if (available < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = pidx; } ring_eq_db(wrq->adapter, eq, ndesc); } else { MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc); next->pidx = pidx; next->ndesc += ndesc; } } else { MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc); prev->ndesc += ndesc; } TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link); if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list)) drain_wrq_wr_list(sc, wrq); #ifdef INVARIANTS if (TAILQ_EMPTY(&wrq->incomplete_wrs)) { /* Doorbell must have caught up to the pidx. */ MPASS(wrq->eq.pidx == wrq->eq.dbidx); } #endif EQ_UNLOCK(eq); } static u_int can_resume_eth_tx(struct mp_ring *r) { struct sge_eq *eq = r->cookie; return (total_available_tx_desc(eq) > eq->sidx / 8); } static inline int cannot_use_txpkts(struct mbuf *m) { /* maybe put a GL limit too, to avoid silliness? */ return (needs_tso(m)); } static inline int discard_tx(struct sge_eq *eq) { return ((eq->flags & (EQ_ENABLED | EQ_QFLUSH)) != EQ_ENABLED); } /* * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to * be consumed. Return the actual number consumed. 0 indicates a stall. */ static u_int eth_tx(struct mp_ring *r, u_int cidx, u_int pidx) { struct sge_txq *txq = r->cookie; struct sge_eq *eq = &txq->eq; struct ifnet *ifp = txq->ifp; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; u_int total, remaining; /* # of packets */ u_int available, dbdiff; /* # of hardware descriptors */ u_int n, next_cidx; struct mbuf *m0, *tail; struct txpkts txp; struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */ remaining = IDXDIFF(pidx, cidx, r->size); MPASS(remaining > 0); /* Must not be called without work to do. */ total = 0; TXQ_LOCK(txq); if (__predict_false(discard_tx(eq))) { while (cidx != pidx) { m0 = r->items[cidx]; m_freem(m0); if (++cidx == r->size) cidx = 0; } reclaim_tx_descs(txq, 2048); total = remaining; goto done; } /* How many hardware descriptors do we have readily available. */ if (eq->pidx == eq->cidx) available = eq->sidx - 1; else available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1; dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx); while (remaining > 0) { m0 = r->items[cidx]; M_ASSERTPKTHDR(m0); MPASS(m0->m_nextpkt == NULL); if (available < SGE_MAX_WR_NDESC) { available += reclaim_tx_descs(txq, 64); if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) break; /* out of descriptors */ } next_cidx = cidx + 1; if (__predict_false(next_cidx == r->size)) next_cidx = 0; wr = (void *)&eq->desc[eq->pidx]; if (sc->flags & IS_VF) { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_vm_wr(sc, txq, (void *)wr, m0, available); } else if (remaining > 1 && try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { /* pkts at cidx, next_cidx should both be in txp. */ MPASS(txp.npkt == 2); tail = r->items[next_cidx]; MPASS(tail->m_nextpkt == NULL); ETHER_BPF_MTAP(ifp, m0); ETHER_BPF_MTAP(ifp, tail); m0->m_nextpkt = tail; if (__predict_false(++next_cidx == r->size)) next_cidx = 0; while (next_cidx != pidx) { if (add_to_txpkts(r->items[next_cidx], &txp, available) != 0) break; tail->m_nextpkt = r->items[next_cidx]; tail = tail->m_nextpkt; ETHER_BPF_MTAP(ifp, tail); if (__predict_false(++next_cidx == r->size)) next_cidx = 0; } n = write_txpkts_wr(txq, wr, m0, &txp, available); total += txp.npkt; remaining -= txp.npkt; } else { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_wr(txq, (void *)wr, m0, available); } MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); available -= n; dbdiff += n; IDXINCR(eq->pidx, n, eq->sidx); if (total_available_tx_desc(eq) < eq->sidx / 4 && atomic_cmpset_int(&eq->equiq, 0, 1)) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ | F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) { wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ); eq->equeqidx = eq->pidx; } if (dbdiff >= 16 && remaining >= 4) { ring_eq_db(sc, eq, dbdiff); available += reclaim_tx_descs(txq, 4 * dbdiff); dbdiff = 0; } cidx = next_cidx; } if (dbdiff != 0) { ring_eq_db(sc, eq, dbdiff); reclaim_tx_descs(txq, 32); } done: TXQ_UNLOCK(txq); return (total); } static inline void init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, int qsize) { KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS, ("%s: bad tmr_idx %d", __func__, tmr_idx)); KASSERT(pktc_idx < SGE_NCOUNTERS, /* -ve is ok, means don't use */ ("%s: bad pktc_idx %d", __func__, pktc_idx)); iq->flags = 0; iq->adapter = sc; iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx); iq->intr_pktc_idx = SGE_NCOUNTERS - 1; if (pktc_idx >= 0) { iq->intr_params |= F_QINTR_CNT_EN; iq->intr_pktc_idx = pktc_idx; } iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */ iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE; } static inline void init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) { fl->qsize = qsize; fl->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; strlcpy(fl->lockname, name, sizeof(fl->lockname)); if (sc->flags & BUF_PACKING_OK && ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ fl->flags |= FL_BUF_PACKING; find_best_refill_source(sc, fl, maxp); find_safe_refill_source(sc, fl); } static inline void init_eq(struct adapter *sc, struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan, uint16_t iqid, char *name) { KASSERT(eqtype <= EQ_TYPEMASK, ("%s: bad qtype %d", __func__, eqtype)); eq->flags = eqtype & EQ_TYPEMASK; eq->tx_chan = tx_chan; eq->iqid = iqid; eq->sidx = qsize - sc->params.sge.spg_len / EQ_ESIZE; strlcpy(eq->lockname, name, sizeof(eq->lockname)); } static int alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_addr_t *pa, void **va) { int rc; rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc); goto done; } rc = bus_dmamem_alloc(*tag, va, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map); if (rc != 0) { device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc); goto done; } rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0); if (rc != 0) { device_printf(sc->dev, "cannot load DMA map: %d\n", rc); goto done; } done: if (rc) free_ring(sc, *tag, *map, *pa, *va); return (rc); } static int free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map, bus_addr_t pa, void *va) { if (pa) bus_dmamap_unload(tag, map); if (va) bus_dmamem_free(tag, va, map); if (tag) bus_dma_tag_destroy(tag); return (0); } /* * Allocates the ring for an ingress queue and an optional freelist. If the * freelist is specified it will be allocated and then associated with the * ingress queue. * * Returns errno on failure. Resources allocated up to that point may still be * allocated. Caller is responsible for cleanup in case this function fails. * * If the ingress queue will take interrupts directly then the intr_idx * specifies the vector, starting from 0. -1 means the interrupts for this * queue should be forwarded to the fwq. */ static int alloc_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl, int intr_idx, int cong) { int rc, i, cntxt_id; size_t len; struct fw_iq_cmd c; struct port_info *pi = vi->pi; struct adapter *sc = iq->adapter; struct sge_params *sp = &sc->params.sge; __be32 v = 0; len = iq->qsize * IQ_ESIZE; rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba, (void **)&iq->desc); if (rc != 0) return (rc); bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) | V_FW_IQ_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART | FW_LEN16(c)); /* Special handling for firmware event queue */ if (iq == &sc->sge.fwq) v |= F_FW_IQ_CMD_IQASYNCH; if (intr_idx < 0) { /* Forwarded interrupts, all headed to fwq */ v |= F_FW_IQ_CMD_IQANDST; v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.fwq.cntxt_id); } else { KASSERT(intr_idx < sc->intr_count, ("%s: invalid direct intr_idx %d", __func__, intr_idx)); v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx); } c.type_to_iqandstindex = htobe32(v | V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) | V_FW_IQ_CMD_VIID(vi->viid) | V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT)); c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) | F_FW_IQ_CMD_IQGTSMODE | V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) | V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); c.iqsize = htobe16(iq->qsize); c.iqaddr = htobe64(iq->ba); if (cong >= 0) c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN); if (fl) { mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); len = fl->qsize * EQ_ESIZE; rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, &fl->ba, (void **)&fl->desc); if (rc) return (rc); /* Allocate space for one software descriptor per buffer. */ rc = alloc_fl_sdesc(fl); if (rc != 0) { device_printf(sc->dev, "failed to setup fl software descriptors: %d\n", rc); return (rc); } if (fl->flags & FL_BUF_PACKING) { fl->lowat = roundup2(sp->fl_starve_threshold2, 8); fl->buf_boundary = sp->pack_boundary; } else { fl->lowat = roundup2(sp->fl_starve_threshold, 8); fl->buf_boundary = 16; } if (fl_pad && fl->buf_boundary < sp->pad_boundary) fl->buf_boundary = sp->pad_boundary; c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN : 0)); if (cong >= 0) { c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | F_FW_IQ_CMD_FL0CONGEN); } c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMIN_128B : X_FETCHBURSTMIN_64B) | V_FW_IQ_CMD_FL0FBMAX(chip_id(sc) <= CHELSIO_T5 ? X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); c.fl0size = htobe16(fl->qsize); c.fl0addr = htobe64(fl->ba); } rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create ingress queue: %d\n", rc); return (rc); } iq->cidx = 0; iq->gen = F_RSPD_GEN; iq->intr_next = iq->intr_params; iq->cntxt_id = be16toh(c.iqid); iq->abs_id = be16toh(c.physiqid); iq->flags |= IQ_ALLOCATED; cntxt_id = iq->cntxt_id - sc->sge.iq_start; if (cntxt_id >= sc->sge.niq) { panic ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.niq - 1); } sc->sge.iqmap[cntxt_id] = iq; if (fl) { u_int qid; iq->flags |= IQ_HAS_FL; fl->cntxt_id = be16toh(c.fl0id); fl->pidx = fl->cidx = 0; cntxt_id = fl->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) { panic("%s: fl->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); } sc->sge.eqmap[cntxt_id] = (void *)fl; qid = fl->cntxt_id; if (isset(&sc->doorbells, DOORBELL_UDB)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (qid >> s_qpp) << PAGE_SHIFT; qid &= mask; if (qid < PAGE_SIZE / UDBS_SEG_SIZE) { udb += qid << UDBS_SEG_SHIFT; qid = 0; } fl->udb = (volatile void *)udb; } fl->dbval = V_QID(qid) | sc->chip_params->sge_fl_db; FL_LOCK(fl); /* Enough to make sure the SGE doesn't think it's starved */ refill_fl(sc, fl, fl->lowat); FL_UNLOCK(fl); } if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && cong >= 0) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | V_FW_PARAMS_PARAM_YZ(iq->cntxt_id); if (cong == 0) val = 1 << 19; else { val = 2 << 19; for (i = 0; i < 4; i++) { if (cong & (1 << i)) val |= 1 << (i << 2); } } rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* report error but carry on */ device_printf(sc->dev, "failed to set congestion manager context for " "ingress queue %d: %d\n", iq->cntxt_id, rc); } } /* Enable IQ interrupts */ atomic_store_rel_int(&iq->state, IQS_IDLE); t4_write_reg(sc, sc->sge_gts_reg, V_SEINTARM(iq->intr_params) | V_INGRESSQID(iq->cntxt_id)); return (0); } static int free_iq_fl(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl) { int rc; struct adapter *sc = iq->adapter; device_t dev; if (sc == NULL) return (0); /* nothing to do */ dev = vi ? vi->dev : sc->dev; if (iq->flags & IQ_ALLOCATED) { rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff); if (rc != 0) { device_printf(dev, "failed to free queue %p: %d\n", iq, rc); return (rc); } iq->flags &= ~IQ_ALLOCATED; } free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc); bzero(iq, sizeof(*iq)); if (fl) { free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba, fl->desc); if (fl->sdesc) free_fl_sdesc(sc, fl); if (mtx_initialized(&fl->fl_lock)) mtx_destroy(&fl->fl_lock); bzero(fl, sizeof(*fl)); } return (0); } static void add_iq_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, struct sge_iq *iq) { struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &iq->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, iq->qsize * IQ_ESIZE, "descriptor ring size in bytes"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &iq->abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &iq->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &iq->cidx, 0, sysctl_uint16, "I", "consumer index"); } static void add_fl_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, struct sge_fl *fl) { struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, "freelist"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &fl->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, fl_pad ? 1 : 0, "padding enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 0, "consumer index"); if (fl->flags & FL_BUF_PACKING) { SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); } SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, 0, "producer index"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); } static int alloc_fwq(struct adapter *sc) { int rc, intr_idx; struct sge_iq *fwq = &sc->sge.fwq; struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); if (sc->flags & IS_VF) intr_idx = 0; else intr_idx = sc->intr_count > 1 ? 1 : 0; rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); if (rc != 0) { device_printf(sc->dev, "failed to create firmware event queue: %d\n", rc); return (rc); } oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "fwq", CTLFLAG_RD, NULL, "firmware event queue"); add_iq_sysctls(&sc->ctx, oid, fwq); return (0); } static int free_fwq(struct adapter *sc) { return free_iq_fl(NULL, &sc->sge.fwq, NULL); } static int alloc_mgmtq(struct adapter *sc) { int rc; struct sge_wrq *mgmtq = &sc->sge.mgmtq; char name[16]; struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, "mgmtq", CTLFLAG_RD, NULL, "management queue"); snprintf(name, sizeof(name), "%s mgmtq", device_get_nameunit(sc->dev)); init_eq(sc, &mgmtq->eq, EQ_CTRL, CTRL_EQ_QSIZE, sc->port[0]->tx_chan, sc->sge.fwq.cntxt_id, name); rc = alloc_wrq(sc, NULL, mgmtq, oid); if (rc != 0) { device_printf(sc->dev, "failed to create management queue: %d\n", rc); return (rc); } return (0); } static int free_mgmtq(struct adapter *sc) { return free_wrq(sc, &sc->sge.mgmtq); } int tnl_cong(struct port_info *pi, int drop) { if (drop == -1) return (-1); else if (drop == 1) return (0); else return (pi->rx_e_chan_map); } static int alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct adapter *sc = vi->pi->adapter; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(vi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(vi->pi, cong_drop)); if (rc != 0) return (rc); if (idx == 0) sc->sge.iq_base = rxq->iq.abs_id - rxq->iq.cntxt_id; else KASSERT(rxq->iq.cntxt_id + sc->sge.iq_base == rxq->iq.abs_id, ("iq_base mismatch")); KASSERT(sc->sge.iq_base == 0 || sc->flags & IS_VF, ("PF with non-zero iq_base")); /* * The freelist is just barely above the starvation threshold right now, * fill it up a bit more. */ FL_LOCK(&rxq->fl); refill_fl(sc, &rxq->fl, 128); FL_UNLOCK(&rxq->fl); #if defined(INET) || defined(INET6) rc = tcp_lro_init_args(&rxq->lro, vi->ifp, lro_entries, lro_mbufs); if (rc != 0) return (rc); MPASS(rxq->lro.ifp == vi->ifp); /* also indicates LRO init'ed */ if (vi->ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; #endif rxq->ifp = vi->ifp; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); add_iq_sysctls(&vi->ctx, oid, &rxq->iq); #if defined(INET) || defined(INET6) SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD, &rxq->lro.lro_queued, 0, NULL); SYSCTL_ADD_U64(&vi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD, &rxq->lro.lro_flushed, 0, NULL); #endif SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD, &rxq->rxcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction", CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl); return (rc); } static int free_rxq(struct vi_info *vi, struct sge_rxq *rxq) { int rc; #if defined(INET) || defined(INET6) if (rxq->lro.ifp) { tcp_lro_free(&rxq->lro); rxq->lro.ifp = NULL; } #endif rc = free_iq_fl(vi, &rxq->iq, &rxq->fl); if (rc == 0) bzero(rxq, sizeof(*rxq)); return (rc); } #ifdef TCP_OFFLOAD static int alloc_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { struct port_info *pi = vi->pi; int rc; struct sysctl_oid_list *children; char name[16]; rc = alloc_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl, intr_idx, 0); if (rc != 0) return (rc); children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); add_iq_sysctls(&vi->ctx, oid, &ofld_rxq->iq); add_fl_sysctls(pi->adapter, &vi->ctx, oid, &ofld_rxq->fl); return (rc); } static int free_ofld_rxq(struct vi_info *vi, struct sge_ofld_rxq *ofld_rxq) { int rc; rc = free_iq_fl(vi, &ofld_rxq->iq, &ofld_rxq->fl); if (rc == 0) bzero(ofld_rxq, sizeof(*ofld_rxq)); return (rc); } #endif #ifdef DEV_NETMAP static int alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx, int idx, struct sysctl_oid *oid) { int rc; struct sysctl_oid_list *children; struct sysctl_ctx_list *ctx; char name[16]; size_t len; struct adapter *sc = vi->pi->adapter; struct netmap_adapter *na = NA(vi->ifp); MPASS(na != NULL); len = vi->qsize_rxq * IQ_ESIZE; rc = alloc_ring(sc, len, &nm_rxq->iq_desc_tag, &nm_rxq->iq_desc_map, &nm_rxq->iq_ba, (void **)&nm_rxq->iq_desc); if (rc != 0) return (rc); len = na->num_rx_desc * EQ_ESIZE + sc->params.sge.spg_len; rc = alloc_ring(sc, len, &nm_rxq->fl_desc_tag, &nm_rxq->fl_desc_map, &nm_rxq->fl_ba, (void **)&nm_rxq->fl_desc); if (rc != 0) return (rc); nm_rxq->vi = vi; nm_rxq->nid = idx; nm_rxq->iq_cidx = 0; nm_rxq->iq_sidx = vi->qsize_rxq - sc->params.sge.spg_len / IQ_ESIZE; nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; nm_rxq->fl_sidx = na->num_rx_desc; nm_rxq->intr_idx = intr_idx; nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; ctx = &vi->ctx; children = SYSCTL_CHILDREN(oid); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "rx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "abs_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_abs_id, 0, sysctl_uint16, "I", "absolute id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->iq_cidx, 0, sysctl_uint16, "I", "consumer index"); children = SYSCTL_CHILDREN(oid); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, "freelist"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &nm_rxq->fl_cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &nm_rxq->fl_cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &nm_rxq->fl_pidx, 0, "producer index"); return (rc); } static int free_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq) { struct adapter *sc = vi->pi->adapter; if (vi->flags & VI_INIT_DONE) MPASS(nm_rxq->iq_cntxt_id == INVALID_NM_RXQ_CNTXT_ID); else MPASS(nm_rxq->iq_cntxt_id == 0); free_ring(sc, nm_rxq->iq_desc_tag, nm_rxq->iq_desc_map, nm_rxq->iq_ba, nm_rxq->iq_desc); free_ring(sc, nm_rxq->fl_desc_tag, nm_rxq->fl_desc_map, nm_rxq->fl_ba, nm_rxq->fl_desc); return (0); } static int alloc_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq, int iqidx, int idx, struct sysctl_oid *oid) { int rc; size_t len; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct netmap_adapter *na = NA(vi->ifp); char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len; rc = alloc_ring(sc, len, &nm_txq->desc_tag, &nm_txq->desc_map, &nm_txq->ba, (void **)&nm_txq->desc); if (rc) return (rc); nm_txq->pidx = nm_txq->cidx = 0; nm_txq->sidx = na->num_tx_desc; nm_txq->nid = idx; nm_txq->iqidx = iqidx; nm_txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); nm_txq->cntxt_id = INVALID_NM_TXQ_CNTXT_ID; snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "netmap tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &nm_txq->cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &nm_txq->pidx, 0, sysctl_uint16, "I", "producer index"); return (rc); } static int free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq) { struct adapter *sc = vi->pi->adapter; if (vi->flags & VI_INIT_DONE) MPASS(nm_txq->cntxt_id == INVALID_NM_TXQ_CNTXT_ID); else MPASS(nm_txq->cntxt_id == 0); free_ring(sc, nm_txq->desc_tag, nm_txq->desc_map, nm_txq->ba, nm_txq->desc); return (0); } #endif static int ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ctrl_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) | V_FW_EQ_CTRL_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC | F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c)); c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); c.physeqid_pkd = htobe32(0); c.fetchszm_to_iqid = htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) | V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) | V_FW_EQ_CTRL_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(sc->dev, "failed to create control queue %d: %d\n", eq->tx_chan, rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } static int eth_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_eth_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) | V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE | F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | V_FW_EQ_ETH_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_ETH_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create Ethernet egress queue: %d\n", rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd)); eq->abs_id = G_FW_EQ_ETH_CMD_PHYSEQID(be32toh(c.physeqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } #if defined(TCP_OFFLOAD) || defined(RATELIMIT) static int ofld_eq_alloc(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, cntxt_id; struct fw_eq_ofld_cmd c; int qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; bzero(&c, sizeof(c)); c.op_to_vfn = htonl(V_FW_CMD_OP(FW_EQ_OFLD_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_OFLD_CMD_PFN(sc->pf) | V_FW_EQ_OFLD_CMD_VFN(0)); c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC | F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c)); c.fetchszm_to_iqid = htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid)); c.dcaen_to_eqsize = htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) | V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) | V_FW_EQ_OFLD_CMD_EQSIZE(qsize)); c.eqaddr = htobe64(eq->ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); if (rc != 0) { device_printf(vi->dev, "failed to create egress queue for TCP offload: %d\n", rc); return (rc); } eq->flags |= EQ_ALLOCATED; eq->cntxt_id = G_FW_EQ_OFLD_CMD_EQID(be32toh(c.eqid_pkd)); cntxt_id = eq->cntxt_id - sc->sge.eq_start; if (cntxt_id >= sc->sge.neq) panic("%s: eq->cntxt_id (%d) more than the max (%d)", __func__, cntxt_id, sc->sge.neq - 1); sc->sge.eqmap[cntxt_id] = eq; return (rc); } #endif static int alloc_eq(struct adapter *sc, struct vi_info *vi, struct sge_eq *eq) { int rc, qsize; size_t len; mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF); qsize = eq->sidx + sc->params.sge.spg_len / EQ_ESIZE; len = qsize * EQ_ESIZE; rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map, &eq->ba, (void **)&eq->desc); if (rc) return (rc); eq->pidx = eq->cidx = 0; eq->equeqidx = eq->dbidx = 0; eq->doorbells = sc->doorbells; switch (eq->flags & EQ_TYPEMASK) { case EQ_CTRL: rc = ctrl_eq_alloc(sc, eq); break; case EQ_ETH: rc = eth_eq_alloc(sc, vi, eq); break; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) case EQ_OFLD: rc = ofld_eq_alloc(sc, vi, eq); break; #endif default: panic("%s: invalid eq type %d.", __func__, eq->flags & EQ_TYPEMASK); } if (rc != 0) { device_printf(sc->dev, "failed to allocate egress queue(%d): %d\n", eq->flags & EQ_TYPEMASK, rc); } if (isset(&eq->doorbells, DOORBELL_UDB) || isset(&eq->doorbells, DOORBELL_UDBWC) || isset(&eq->doorbells, DOORBELL_WCWR)) { uint32_t s_qpp = sc->params.sge.eq_s_qpp; uint32_t mask = (1 << s_qpp) - 1; volatile uint8_t *udb; udb = sc->udbs_base + UDBS_DB_OFFSET; udb += (eq->cntxt_id >> s_qpp) << PAGE_SHIFT; /* pg offset */ eq->udb_qid = eq->cntxt_id & mask; /* id in page */ if (eq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE) clrbit(&eq->doorbells, DOORBELL_WCWR); else { udb += eq->udb_qid << UDBS_SEG_SHIFT; /* seg offset */ eq->udb_qid = 0; } eq->udb = (volatile void *)udb; } return (rc); } static int free_eq(struct adapter *sc, struct sge_eq *eq) { int rc; if (eq->flags & EQ_ALLOCATED) { switch (eq->flags & EQ_TYPEMASK) { case EQ_CTRL: rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; case EQ_ETH: rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; #if defined(TCP_OFFLOAD) || defined(RATELIMIT) case EQ_OFLD: rc = -t4_ofld_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id); break; #endif default: panic("%s: invalid eq type %d.", __func__, eq->flags & EQ_TYPEMASK); } if (rc != 0) { device_printf(sc->dev, "failed to free egress queue (%d): %d\n", eq->flags & EQ_TYPEMASK, rc); return (rc); } eq->flags &= ~EQ_ALLOCATED; } free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc); if (mtx_initialized(&eq->eq_lock)) mtx_destroy(&eq->eq_lock); bzero(eq, sizeof(*eq)); return (0); } static int alloc_wrq(struct adapter *sc, struct vi_info *vi, struct sge_wrq *wrq, struct sysctl_oid *oid) { int rc; struct sysctl_ctx_list *ctx = vi ? &vi->ctx : &sc->ctx; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); rc = alloc_eq(sc, vi, &wrq->eq); if (rc) return (rc); wrq->adapter = sc; TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq); TAILQ_INIT(&wrq->incomplete_wrs); STAILQ_INIT(&wrq->wr_list); wrq->nwr_pending = 0; wrq->ndesc_needed = 0; SYSCTL_ADD_UAUTO(ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &wrq->eq.ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, wrq->eq.sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &wrq->eq.cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I", "producer index"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, wrq->eq.sidx, "status page index"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD, &wrq->tx_wrs_direct, "# of work requests (direct)"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD, &wrq->tx_wrs_copied, "# of work requests (copied)"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_sspace", CTLFLAG_RD, &wrq->tx_wrs_ss, "# of work requests (copied from scratch space)"); return (rc); } static int free_wrq(struct adapter *sc, struct sge_wrq *wrq) { int rc; rc = free_eq(sc, &wrq->eq); if (rc) return (rc); bzero(wrq, sizeof(*wrq)); return (0); } static int alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx, struct sysctl_oid *oid) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_eq *eq = &txq->eq; char name[16]; struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx, M_CXGBE, M_WAITOK); if (rc != 0) { device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc); return (rc); } rc = alloc_eq(sc, vi, eq); if (rc != 0) { mp_ring_free(txq->r); txq->r = NULL; return (rc); } /* Can't fail after this point. */ if (idx == 0) sc->sge.eq_base = eq->abs_id - eq->cntxt_id; else KASSERT(eq->cntxt_id + sc->sge.eq_base == eq->abs_id, ("eq_base mismatch")); KASSERT(sc->sge.eq_base == 0 || sc->flags & IS_VF, ("PF with non-zero eq_base")); TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); txq->ifp = vi->ifp; txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); if (sc->flags & IS_VF) txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | V_TXPKT_INTF(pi->tx_chan)); else txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); txq->tc_idx = -1; txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); snprintf(name, sizeof(name), "%d", idx); oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name, CTLFLAG_RD, NULL, "tx queue"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UAUTO(&vi->ctx, children, OID_AUTO, "ba", CTLFLAG_RD, &eq->ba, "bus address of descriptor ring"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "dmalen", CTLFLAG_RD, NULL, eq->sidx * EQ_ESIZE + sc->params.sge.spg_len, "desc ring size in bytes"); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "abs_id", CTLFLAG_RD, &eq->abs_id, 0, "absolute id of the queue"); SYSCTL_ADD_UINT(&vi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD, &eq->cntxt_id, 0, "SGE context id of the queue"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "cidx", CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I", "consumer index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "pidx", CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I", "producer index"); SYSCTL_ADD_INT(&vi->ctx, children, OID_AUTO, "sidx", CTLFLAG_RD, NULL, eq->sidx, "status page index"); SYSCTL_ADD_PROC(&vi->ctx, children, OID_AUTO, "tc", CTLTYPE_INT | CTLFLAG_RW, vi, idx, sysctl_tc, "I", "traffic class (-1 means none)"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD, &txq->txcsum, "# of times hardware assisted with checksum"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_insertion", CTLFLAG_RD, &txq->vlan_insertion, "# of times hardware inserted 802.1Q tag"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD, &txq->tso_wrs, "# of TSO work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD, &txq->imm_wrs, "# of work requests with immediate data"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD, &txq->sgl_wrs, "# of work requests with direct SGL"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD, &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_wrs", CTLFLAG_RD, &txq->txpkts0_wrs, "# of txpkts (type 0) work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_wrs", CTLFLAG_RD, &txq->txpkts1_wrs, "# of txpkts (type 1) work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts0_pkts", CTLFLAG_RD, &txq->txpkts0_pkts, "# of frames tx'd using type0 txpkts work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "txpkts1_pkts", CTLFLAG_RD, &txq->txpkts1_pkts, "# of frames tx'd using type1 txpkts work requests"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->r->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->r->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->r->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->r->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->r->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->r->abdications, "# of consumer abdications in the mp_ring for this queue"); return (0); } static int free_txq(struct vi_info *vi, struct sge_txq *txq) { int rc; struct adapter *sc = vi->pi->adapter; struct sge_eq *eq = &txq->eq; rc = free_eq(sc, eq); if (rc) return (rc); sglist_free(txq->gl); free(txq->sdesc, M_CXGBE); mp_ring_free(txq->r); bzero(txq, sizeof(*txq)); return (0); } static void oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *ba = arg; KASSERT(nseg == 1, ("%s meant for single segment mappings only.", __func__)); *ba = error ? 0 : segs->ds_addr; } static inline void ring_fl_db(struct adapter *sc, struct sge_fl *fl) { uint32_t n, v; n = IDXDIFF(fl->pidx / 8, fl->dbidx, fl->sidx); MPASS(n > 0); wmb(); v = fl->dbval | V_PIDX(n); if (fl->udb) *fl->udb = htole32(v); else t4_write_reg(sc, sc->sge_kdoorbell_reg, v); IDXINCR(fl->dbidx, n, fl->sidx); } /* * Fills up the freelist by allocating up to 'n' buffers. Buffers that are * recycled do not count towards this allocation budget. * * Returns non-zero to indicate that this freelist should be added to the list * of starving freelists. */ static int refill_fl(struct adapter *sc, struct sge_fl *fl, int n) { __be64 *d; struct fl_sdesc *sd; uintptr_t pa; caddr_t cl; struct cluster_layout *cll; struct sw_zone_info *swz; struct cluster_metadata *clm; uint16_t max_pidx; uint16_t hw_cidx = fl->hw_cidx; /* stable snapshot */ FL_LOCK_ASSERT_OWNED(fl); /* * We always stop at the beginning of the hardware descriptor that's just * before the one with the hw cidx. This is to avoid hw pidx = hw cidx, * which would mean an empty freelist to the chip. */ max_pidx = __predict_false(hw_cidx == 0) ? fl->sidx - 1 : hw_cidx - 1; if (fl->pidx == max_pidx * 8) return (0); d = &fl->desc[fl->pidx]; sd = &fl->sdesc[fl->pidx]; cll = &fl->cll_def; /* default layout */ swz = &sc->sge.sw_zone_info[cll->zidx]; while (n > 0) { if (sd->cl != NULL) { if (sd->nmbuf == 0) { /* * Fast recycle without involving any atomics on * the cluster's metadata (if the cluster has * metadata). This happens when all frames * received in the cluster were small enough to * fit within a single mbuf each. */ fl->cl_fast_recycled++; #ifdef INVARIANTS clm = cl_metadata(sc, fl, &sd->cll, sd->cl); if (clm != NULL) MPASS(clm->refcount == 1); #endif goto recycled_fast; } /* * Cluster is guaranteed to have metadata. Clusters * without metadata always take the fast recycle path * when they're recycled. */ clm = cl_metadata(sc, fl, &sd->cll, sd->cl); MPASS(clm != NULL); if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { fl->cl_recycled++; counter_u64_add(extfree_rels, 1); goto recycled; } sd->cl = NULL; /* gave up my reference */ } MPASS(sd->cl == NULL); alloc: cl = uma_zalloc(swz->zone, M_NOWAIT); if (__predict_false(cl == NULL)) { if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || fl->cll_def.zidx == fl->cll_alt.zidx) break; /* fall back to the safe zone */ cll = &fl->cll_alt; swz = &sc->sge.sw_zone_info[cll->zidx]; goto alloc; } fl->cl_allocated++; n--; pa = pmap_kextract((vm_offset_t)cl); pa += cll->region1; sd->cl = cl; sd->cll = *cll; *d = htobe64(pa | cll->hwidx); clm = cl_metadata(sc, fl, cll, cl); if (clm != NULL) { recycled: #ifdef INVARIANTS clm->sd = sd; #endif clm->refcount = 1; } sd->nmbuf = 0; recycled_fast: d++; sd++; if (__predict_false(++fl->pidx % 8 == 0)) { uint16_t pidx = fl->pidx / 8; if (__predict_false(pidx == fl->sidx)) { fl->pidx = 0; pidx = 0; sd = fl->sdesc; d = fl->desc; } if (pidx == max_pidx) break; if (IDXDIFF(pidx, fl->dbidx, fl->sidx) >= 4) ring_fl_db(sc, fl); } } if (fl->pidx / 8 != fl->dbidx) ring_fl_db(sc, fl); return (FL_RUNNING_LOW(fl) && !(fl->flags & FL_STARVING)); } /* * Attempt to refill all starving freelists. */ static void refill_sfl(void *arg) { struct adapter *sc = arg; struct sge_fl *fl, *fl_temp; mtx_assert(&sc->sfl_lock, MA_OWNED); TAILQ_FOREACH_SAFE(fl, &sc->sfl, link, fl_temp) { FL_LOCK(fl); refill_fl(sc, fl, 64); if (FL_NOT_RUNNING_LOW(fl) || fl->flags & FL_DOOMED) { TAILQ_REMOVE(&sc->sfl, fl, link); fl->flags &= ~FL_STARVING; } FL_UNLOCK(fl); } if (!TAILQ_EMPTY(&sc->sfl)) callout_schedule(&sc->sfl_callout, hz / 5); } static int alloc_fl_sdesc(struct sge_fl *fl) { fl->sdesc = malloc(fl->sidx * 8 * sizeof(struct fl_sdesc), M_CXGBE, M_ZERO | M_WAITOK); return (0); } static void free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) { struct fl_sdesc *sd; struct cluster_metadata *clm; struct cluster_layout *cll; int i; sd = fl->sdesc; for (i = 0; i < fl->sidx * 8; i++, sd++) { if (sd->cl == NULL) continue; cll = &sd->cll; clm = cl_metadata(sc, fl, cll, sd->cl); if (sd->nmbuf == 0) uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); else if (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1) { uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); counter_u64_add(extfree_rels, 1); } sd->cl = NULL; } free(fl->sdesc, M_CXGBE); fl->sdesc = NULL; } static inline void get_pkt_gl(struct mbuf *m, struct sglist *gl) { int rc; M_ASSERTPKTHDR(m); sglist_reset(gl); rc = sglist_append_mbuf(gl, m); if (__predict_false(rc != 0)) { panic("%s: mbuf %p (%d segs) was vetted earlier but now fails " "with %d.", __func__, m, mbuf_nsegs(m), rc); } KASSERT(gl->sg_nseg == mbuf_nsegs(m), ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m, mbuf_nsegs(m), gl->sg_nseg)); KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS), ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__, gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)); } /* * len16 for a txpkt WR with a GL. Includes the firmware work request header. */ static inline u_int txpkt_len16(u_int nsegs, u_int tso) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); return (howmany(n, 16)); } /* * len16 for a txpkt_vm WR with a GL. Includes the firmware work * request header. */ static inline u_int txpkt_vm_len16(u_int nsegs, u_int tso) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct fw_eth_tx_pkt_vm_wr) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); return (howmany(n, 16)); } /* * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work * request header. */ static inline u_int txpkts0_len16(u_int nsegs) { u_int n; MPASS(nsegs > 0); nsegs--; /* first segment is part of ulptx_sgl */ n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) + sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); return (howmany(n, 16)); } /* * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work * request header. */ static inline u_int txpkts1_len16(void) { u_int n; n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl); return (howmany(n, 16)); } static inline u_int imm_payload(u_int ndesc) { u_int n; n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) - sizeof(struct cpl_tx_pkt_core); return (n); } /* * Write a VM txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkt_vm_wr(struct adapter *sc, struct sge_txq *txq, struct fw_eth_tx_pkt_vm_wr *wr, struct mbuf *m0, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int csum_type, len16, ndesc, pktlen, nsegs; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m0); MPASS(available > 0 && available < eq->sidx); len16 = mbuf_len16(m0); nsegs = mbuf_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc <= available); /* Firmware work request header */ MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3[0] = 0; wr->r3[1] = 0; /* * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. * vlantci is ignored unless the ethtype is 0x8100, so it's * simpler to always copy it rather than making it * conditional. Also, it seems that we do not have to set * vlantci or fake the ethtype when doing VLAN tag insertion. */ m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); csum_type = -1; if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: mbuf %p needs TSO but missing header lengths", __func__, m0)); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) csum_type = TX_CSUM_TCPIP6; else csum_type = TX_CSUM_TCPIP; cpl = (void *)(lso + 1); txq->tso_wrs++; } else { if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) csum_type = TX_CSUM_TCPIP; else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) csum_type = TX_CSUM_UDPIP; else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) csum_type = TX_CSUM_TCPIP6; else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) csum_type = TX_CSUM_UDPIP6; #if defined(INET) else if (m0->m_pkthdr.csum_flags & CSUM_IP) { /* * XXX: The firmware appears to stomp on the * fragment/flags field of the IP header when * using TX_CSUM_IP. Fall back to doing * software checksums. */ u_short *sump; struct mbuf *m; int offset; m = m0; offset = 0; sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + offsetof(struct ip, ip_sum)); *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); m0->m_pkthdr.csum_flags &= ~CSUM_IP; } #endif cpl = (void *)(wr + 1); } /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m0) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (csum_type >= 0) { KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, ("%s: mbuf %p needs checksum offload but missing header lengths", __func__, m0)); if (chip_id(sc) <= CHELSIO_T5) { ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN); } else { ctrl1 |= V_T6_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - ETHER_HDR_LEN); } ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); } else ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* SGL */ dst = (void *)(cpl + 1); /* * A packet using TSO will use up an entire descriptor for the * firmware work request header, LSO CPL, and TX_PKT_XT CPL. * If this descriptor is the last descriptor in the ring, wrap * around to the front of the ring explicitly for the start of * the sgl. */ if (dst == (void *)&eq->desc[eq->sidx]) { dst = (void *)&eq->desc[0]; write_gl_to_txd(txq, m0, &dst, 0); } else write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); txq->sgl_wrs++; txq->txpkt_wrs++; txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } /* * Write a txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr, struct mbuf *m0, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; /* used in many unrelated places */ uint64_t ctrl1; int len16, ndesc, pktlen, nsegs; caddr_t dst; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m0); MPASS(available > 0 && available < eq->sidx); len16 = mbuf_len16(m0); nsegs = mbuf_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); else if (pktlen <= imm_payload(2) && available >= 2) { /* Immediate data. Recalculate len16 and set nsegs to 0. */ ctrl += pktlen; len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) + pktlen, 16); nsegs = 0; } ndesc = howmany(len16, EQ_ESIZE / 16); MPASS(ndesc <= available); /* Firmware work request header */ MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); ctrl = V_FW_WR_LEN16(len16); wr->equiq_to_len16 = htobe32(ctrl); wr->r3 = 0; if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: mbuf %p needs TSO but missing header lengths", __func__, m0)); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); cpl = (void *)(lso + 1); txq->tso_wrs++; } else cpl = (void *)(wr + 1); /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m0) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (needs_l4_csum(m0) == 0) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* SGL */ dst = (void *)(cpl + 1); if (nsegs > 0) { write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); txq->sgl_wrs++; } else { struct mbuf *m; for (m = m0; m != NULL; m = m->m_next) { copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len); #ifdef INVARIANTS pktlen -= m->m_len; #endif } #ifdef INVARIANTS KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen)); #endif txq->imm_wrs++; } txq->txpkt_wrs++; txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } static int try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available) { u_int needed, nsegs1, nsegs2, l1, l2; if (cannot_use_txpkts(m) || cannot_use_txpkts(n)) return (1); nsegs1 = mbuf_nsegs(m); nsegs2 = mbuf_nsegs(n); if (nsegs1 + nsegs2 == 2) { txp->wr_type = 1; l1 = l2 = txpkts1_len16(); } else { txp->wr_type = 0; l1 = txpkts0_len16(nsegs1); l2 = txpkts0_len16(nsegs2); } txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2; needed = howmany(txp->len16, EQ_ESIZE / 16); if (needed > SGE_MAX_WR_NDESC || needed > available) return (1); txp->plen = m->m_pkthdr.len + n->m_pkthdr.len; if (txp->plen > 65535) return (1); txp->npkt = 2; set_mbuf_len16(m, l1); set_mbuf_len16(n, l2); return (0); } static int add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available) { u_int plen, len16, needed, nsegs; MPASS(txp->wr_type == 0 || txp->wr_type == 1); nsegs = mbuf_nsegs(m); if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1)) return (1); plen = txp->plen + m->m_pkthdr.len; if (plen > 65535) return (1); if (txp->wr_type == 0) len16 = txpkts0_len16(nsegs); else len16 = txpkts1_len16(); needed = howmany(txp->len16 + len16, EQ_ESIZE / 16); if (needed > SGE_MAX_WR_NDESC || needed > available) return (1); txp->npkt++; txp->plen = plen; txp->len16 += len16; set_mbuf_len16(m, len16); return (0); } /* * Write a txpkts WR for the packets in txp to the hardware descriptors, update * the software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. * * The return value is the # of hardware descriptors used. */ static u_int write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr, struct mbuf *m0, const struct txpkts *txp, u_int available) { struct sge_eq *eq = &txq->eq; struct tx_sdesc *txsd; struct cpl_tx_pkt_core *cpl; uint32_t ctrl; uint64_t ctrl1; int ndesc, checkwrap; struct mbuf *m; void *flitp; TXQ_LOCK_ASSERT_OWNED(txq); MPASS(txp->npkt > 0); MPASS(txp->plen < 65536); MPASS(m0 != NULL); MPASS(m0->m_nextpkt != NULL); MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16)); MPASS(available > 0 && available < eq->sidx); ndesc = howmany(txp->len16, EQ_ESIZE / 16); MPASS(ndesc <= available); MPASS(wr == (void *)&eq->desc[eq->pidx]); wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)); ctrl = V_FW_WR_LEN16(txp->len16); wr->equiq_to_len16 = htobe32(ctrl); wr->plen = htobe16(txp->plen); wr->npkt = txp->npkt; wr->r3 = 0; wr->type = txp->wr_type; flitp = wr + 1; /* * At this point we are 16B into a hardware descriptor. If checkwrap is * set then we know the WR is going to wrap around somewhere. We'll * check for that at appropriate points. */ checkwrap = eq->sidx - ndesc < eq->pidx; for (m = m0; m != NULL; m = m->m_nextpkt) { if (txp->wr_type == 0) { struct ulp_txpkt *ulpmc; struct ulptx_idata *ulpsc; /* ULP master command */ ulpmc = flitp; ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid)); ulpmc->len = htobe32(mbuf_len16(m)); /* ULP subcommand */ ulpsc = (void *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | F_ULP_TX_SC_MORE); ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core)); cpl = (void *)(ulpsc + 1); if (checkwrap && (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx]) cpl = (void *)&eq->desc[0]; } else { cpl = flitp; } /* Checksum offload */ ctrl1 = 0; if (needs_l3_csum(m) == 0) ctrl1 |= F_TXPKT_IPCSUM_DIS; if (needs_l4_csum(m) == 0) ctrl1 |= F_TXPKT_L4CSUM_DIS; if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) txq->txcsum++; /* some hardware assistance provided */ /* VLAN tag insertion */ if (needs_vlan_insertion(m)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag); txq->vlan_insertion++; } /* CPL header */ cpl->ctrl0 = txq->cpl_ctrl0; cpl->pack = 0; cpl->len = htobe16(m->m_pkthdr.len); cpl->ctrl1 = htobe64(ctrl1); flitp = cpl + 1; if (checkwrap && (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx]) flitp = (void *)&eq->desc[0]; write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap); } if (txp->wr_type == 0) { txq->txpkts0_pkts += txp->npkt; txq->txpkts0_wrs++; } else { txq->txpkts1_pkts += txp->npkt; txq->txpkts1_wrs++; } txsd = &txq->sdesc[eq->pidx]; txsd->m = m0; txsd->desc_used = ndesc; return (ndesc); } /* * If the SGL ends on an address that is not 16 byte aligned, this function will * add a 0 filled flit at the end. */ static void write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap) { struct sge_eq *eq = &txq->eq; struct sglist *gl = txq->gl; struct sglist_seg *seg; __be64 *flitp, *wrap; struct ulptx_sgl *usgl; int i, nflits, nsegs; KASSERT(((uintptr_t)(*to) & 0xf) == 0, ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to)); MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); get_pkt_gl(m, gl); nsegs = gl->sg_nseg; MPASS(nsegs > 0); nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; flitp = (__be64 *)(*to); wrap = (__be64 *)(&eq->desc[eq->sidx]); seg = &gl->sg_segs[0]; usgl = (void *)flitp; /* * We start at a 16 byte boundary somewhere inside the tx descriptor * ring, so we're at least 16 bytes away from the status page. There is * no chance of a wrap around in the middle of usgl (which is 16 bytes). */ usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); usgl->len0 = htobe32(seg->ss_len); usgl->addr0 = htobe64(seg->ss_paddr); seg++; if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) { /* Won't wrap around at all */ for (i = 0; i < nsegs - 1; i++, seg++) { usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); flitp += nflits; } else { /* Will wrap somewhere in the rest of the SGL */ /* 2 flits already written, write the rest flit by flit */ flitp = (void *)(usgl + 1); for (i = 0; i < nflits - 2; i++) { if (flitp == wrap) flitp = (void *)eq->desc; *flitp++ = get_flit(seg, nsegs - 1, i); } } if (nflits & 1) { MPASS(((uintptr_t)flitp) & 0xf); *flitp++ = 0; } MPASS((((uintptr_t)flitp) & 0xf) == 0); if (__predict_false(flitp == wrap)) *to = (void *)eq->desc; else *to = (void *)flitp; } static inline void copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) { MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)&eq->desc[eq->sidx])) { bcopy(from, *to, len); (*to) += len; } else { int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); bcopy(from, *to, portion); from += portion; portion = len - portion; /* remaining */ bcopy(from, (void *)eq->desc, portion); (*to) = (caddr_t)eq->desc + portion; } } static inline void ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n) { u_int db; MPASS(n > 0); db = eq->doorbells; if (n > 1) clrbit(&db, DOORBELL_WCWR); wmb(); switch (ffs(db) - 1) { case DOORBELL_UDB: *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); break; case DOORBELL_WCWR: { volatile uint64_t *dst, *src; int i; /* * Queues whose 128B doorbell segment fits in the page do not * use relative qid (udb_qid is always 0). Only queues with * doorbell segments can do WCWR. */ KASSERT(eq->udb_qid == 0 && n == 1, ("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p", __func__, eq->doorbells, n, eq->dbidx, eq)); dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET - UDBS_DB_OFFSET); i = eq->dbidx; src = (void *)&eq->desc[i]; while (src != (void *)&eq->desc[i + 1]) *dst++ = *src++; wmb(); break; } case DOORBELL_UDBWC: *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n)); wmb(); break; case DOORBELL_KDB: t4_write_reg(sc, sc->sge_kdoorbell_reg, V_QID(eq->cntxt_id) | V_PIDX(n)); break; } IDXINCR(eq->dbidx, n, eq->sidx); } static inline u_int reclaimable_tx_desc(struct sge_eq *eq) { uint16_t hw_cidx; hw_cidx = read_hw_cidx(eq); return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx)); } static inline u_int total_available_tx_desc(struct sge_eq *eq) { uint16_t hw_cidx, pidx; hw_cidx = read_hw_cidx(eq); pidx = eq->pidx; if (pidx == hw_cidx) return (eq->sidx - 1); else return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1); } static inline uint16_t read_hw_cidx(struct sge_eq *eq) { struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; uint16_t cidx = spg->cidx; /* stable snapshot */ return (be16toh(cidx)); } /* * Reclaim 'n' descriptors approximately. */ static u_int reclaim_tx_descs(struct sge_txq *txq, u_int n) { struct tx_sdesc *txsd; struct sge_eq *eq = &txq->eq; u_int can_reclaim, reclaimed; TXQ_LOCK_ASSERT_OWNED(txq); MPASS(n > 0); reclaimed = 0; can_reclaim = reclaimable_tx_desc(eq); while (can_reclaim && reclaimed < n) { int ndesc; struct mbuf *m, *nextpkt; txsd = &txq->sdesc[eq->cidx]; ndesc = txsd->desc_used; /* Firmware doesn't return "partial" credits. */ KASSERT(can_reclaim >= ndesc, ("%s: unexpected number of credits: %d, %d", __func__, can_reclaim, ndesc)); for (m = txsd->m; m != NULL; m = nextpkt) { nextpkt = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); } reclaimed += ndesc; can_reclaim -= ndesc; IDXINCR(eq->cidx, ndesc, eq->sidx); } return (reclaimed); } static void tx_reclaim(void *arg, int n) { struct sge_txq *txq = arg; struct sge_eq *eq = &txq->eq; do { if (TXQ_TRYLOCK(txq) == 0) break; n = reclaim_tx_descs(txq, 32); if (eq->cidx == eq->pidx) eq->equeqidx = eq->pidx; TXQ_UNLOCK(txq); } while (n > 0); } static __be64 get_flit(struct sglist_seg *segs, int nsegs, int idx) { int i = (idx / 3) * 2; switch (idx % 3) { case 0: { uint64_t rc; rc = (uint64_t)segs[i].ss_len << 32; if (i + 1 < nsegs) rc |= (uint64_t)(segs[i + 1].ss_len); return (htobe64(rc)); } case 1: return (htobe64(segs[i].ss_paddr)); case 2: return (htobe64(segs[i + 1].ss_paddr)); } return (0); } static void find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) { int8_t zidx, hwidx, idx; uint16_t region1, region3; int spare, spare_needed, n; struct sw_zone_info *swz; struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; /* * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize * large enough for the max payload and cluster metadata. Otherwise * settle for the largest bufsize that leaves enough room in the cluster * for metadata. * * Without buffer packing: Look for the smallest zone which has a * bufsize large enough for the max payload. Settle for the largest * bufsize available if there's nothing big enough for max payload. */ spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; swz = &sc->sge.sw_zone_info[0]; hwidx = -1; for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { if (swz->size > largest_rx_cluster) { if (__predict_true(hwidx != -1)) break; /* * This is a misconfiguration. largest_rx_cluster is * preventing us from finding a refill source. See * dev.t5nex..buffer_sizes to figure out why. */ device_printf(sc->dev, "largest_rx_cluster=%u leaves no" " refill source for fl %p (dma %u). Ignored.\n", largest_rx_cluster, fl, maxp); } for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { hwb = &hwb_list[idx]; spare = swz->size - hwb->size; if (spare < spare_needed) continue; hwidx = idx; /* best option so far */ if (hwb->size >= maxp) { if ((fl->flags & FL_BUF_PACKING) == 0) goto done; /* stop looking (not packing) */ if (swz->size >= safest_rx_cluster) goto done; /* stop looking (packing) */ } break; /* keep looking, next zone */ } } done: /* A usable hwidx has been located. */ MPASS(hwidx != -1); hwb = &hwb_list[hwidx]; zidx = hwb->zidx; swz = &sc->sge.sw_zone_info[zidx]; region1 = 0; region3 = swz->size - hwb->size; /* * Stay within this zone and see if there is a better match when mbuf * inlining is allowed. Remember that the hwidx's are sorted in * decreasing order of size (so in increasing order of spare area). */ for (idx = hwidx; idx != -1; idx = hwb->next) { hwb = &hwb_list[idx]; spare = swz->size - hwb->size; if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) break; /* * Do not inline mbufs if doing so would violate the pad/pack * boundary alignment requirement. */ if (fl_pad && (MSIZE % sc->params.sge.pad_boundary) != 0) continue; if (fl->flags & FL_BUF_PACKING && (MSIZE % sc->params.sge.pack_boundary) != 0) continue; if (spare < CL_METADATA_SIZE + MSIZE) continue; n = (spare - CL_METADATA_SIZE) / MSIZE; if (n > howmany(hwb->size, maxp)) break; hwidx = idx; if (fl->flags & FL_BUF_PACKING) { region1 = n * MSIZE; region3 = spare - region1; } else { region1 = MSIZE; region3 = spare - region1; break; } } KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == sc->sge.sw_zone_info[zidx].size, ("%s: bad buffer layout for fl %p, maxp %d. " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); if (fl->flags & FL_BUF_PACKING || region1 > 0) { KASSERT(region3 >= CL_METADATA_SIZE, ("%s: no room for metadata. fl %p, maxp %d; " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); KASSERT(region1 % MSIZE == 0, ("%s: bad mbuf region for fl %p, maxp %d. " "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, sc->sge.sw_zone_info[zidx].size, region1, sc->sge.hw_buf_info[hwidx].size, region3)); } fl->cll_def.zidx = zidx; fl->cll_def.hwidx = hwidx; fl->cll_def.region1 = region1; fl->cll_def.region3 = region3; } static void find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) { struct sge *s = &sc->sge; struct hw_buf_info *hwb; struct sw_zone_info *swz; int spare; int8_t hwidx; if (fl->flags & FL_BUF_PACKING) hwidx = s->safe_hwidx2; /* with room for metadata */ else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { hwidx = s->safe_hwidx2; hwb = &s->hw_buf_info[hwidx]; swz = &s->sw_zone_info[hwb->zidx]; spare = swz->size - hwb->size; /* no good if there isn't room for an mbuf as well */ if (spare < CL_METADATA_SIZE + MSIZE) hwidx = s->safe_hwidx1; } else hwidx = s->safe_hwidx1; if (hwidx == -1) { /* No fallback source */ fl->cll_alt.hwidx = -1; fl->cll_alt.zidx = -1; return; } hwb = &s->hw_buf_info[hwidx]; swz = &s->sw_zone_info[hwb->zidx]; spare = swz->size - hwb->size; fl->cll_alt.hwidx = hwidx; fl->cll_alt.zidx = hwb->zidx; if (allow_mbufs_in_cluster && (fl_pad == 0 || (MSIZE % sc->params.sge.pad_boundary) == 0)) fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; else fl->cll_alt.region1 = 0; fl->cll_alt.region3 = spare - fl->cll_alt.region1; } static void add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); if ((fl->flags & FL_DOOMED) == 0) { fl->flags |= FL_STARVING; TAILQ_INSERT_TAIL(&sc->sfl, fl, link); callout_reset(&sc->sfl_callout, hz / 5, refill_sfl, sc); } FL_UNLOCK(fl); mtx_unlock(&sc->sfl_lock); } static void handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq) { struct sge_wrq *wrq = (void *)eq; atomic_readandclear_int(&eq->equiq); taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task); } static void handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq) { struct sge_txq *txq = (void *)eq; MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH); atomic_readandclear_int(&eq->equiq); mp_ring_check_drainage(txq->r, 0); taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task); } static int handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { const struct cpl_sge_egr_update *cpl = (const void *)(rss + 1); unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid)); struct adapter *sc = iq->adapter; struct sge *s = &sc->sge; struct sge_eq *eq; static void (*h[])(struct adapter *, struct sge_eq *) = {NULL, &handle_wrq_egr_update, &handle_eth_egr_update, &handle_wrq_egr_update}; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); eq = s->eqmap[qid - s->eq_start - s->eq_base]; (*h[eq->flags & EQ_TYPEMASK])(sc, eq); return (0); } /* handle_fw_msg works for both fw4_msg and fw6_msg because this is valid */ CTASSERT(offsetof(struct cpl_fw4_msg, data) == \ offsetof(struct cpl_fw6_msg, data)); static int handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_fw6_msg *cpl = (const void *)(rss + 1); KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (cpl->type == FW_TYPE_RSSCPL || cpl->type == FW6_TYPE_RSSCPL) { const struct rss_header *rss2; rss2 = (const struct rss_header *)&cpl->data[0]; return (t4_cpl_handler[rss2->opcode](iq, rss2, m)); } return (t4_fw_msg_handler[cpl->type](sc, &cpl->data[0])); } /** * t4_handle_wrerr_rpl - process a FW work request error message * @adap: the adapter * @rpl: start of the FW message */ static int t4_handle_wrerr_rpl(struct adapter *adap, const __be64 *rpl) { u8 opcode = *(const u8 *)rpl; const struct fw_error_cmd *e = (const void *)rpl; unsigned int i; if (opcode != FW_ERROR_CMD) { log(LOG_ERR, "%s: Received WRERR_RPL message with opcode %#x\n", device_get_nameunit(adap->dev), opcode); return (EINVAL); } log(LOG_ERR, "%s: FW_ERROR (%s) ", device_get_nameunit(adap->dev), G_FW_ERROR_CMD_FATAL(be32toh(e->op_to_type)) ? "fatal" : "non-fatal"); switch (G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))) { case FW_ERROR_TYPE_EXCEPTION: log(LOG_ERR, "exception info:\n"); for (i = 0; i < nitems(e->u.exception.info); i++) log(LOG_ERR, "%s%08x", i == 0 ? "\t" : " ", be32toh(e->u.exception.info[i])); log(LOG_ERR, "\n"); break; case FW_ERROR_TYPE_HWMODULE: log(LOG_ERR, "HW module regaddr %08x regval %08x\n", be32toh(e->u.hwmodule.regaddr), be32toh(e->u.hwmodule.regval)); break; case FW_ERROR_TYPE_WR: log(LOG_ERR, "WR cidx %d PF %d VF %d eqid %d hdr:\n", be16toh(e->u.wr.cidx), G_FW_ERROR_CMD_PFN(be16toh(e->u.wr.pfn_vfn)), G_FW_ERROR_CMD_VFN(be16toh(e->u.wr.pfn_vfn)), be32toh(e->u.wr.eqid)); for (i = 0; i < nitems(e->u.wr.wrhdr); i++) log(LOG_ERR, "%s%02x", i == 0 ? "\t" : " ", e->u.wr.wrhdr[i]); log(LOG_ERR, "\n"); break; case FW_ERROR_TYPE_ACL: log(LOG_ERR, "ACL cidx %d PF %d VF %d eqid %d %s", be16toh(e->u.acl.cidx), G_FW_ERROR_CMD_PFN(be16toh(e->u.acl.pfn_vfn)), G_FW_ERROR_CMD_VFN(be16toh(e->u.acl.pfn_vfn)), be32toh(e->u.acl.eqid), G_FW_ERROR_CMD_MV(be16toh(e->u.acl.mv_pkd)) ? "vlanid" : "MAC"); for (i = 0; i < nitems(e->u.acl.val); i++) log(LOG_ERR, " %02x", e->u.acl.val[i]); log(LOG_ERR, "\n"); break; default: log(LOG_ERR, "type %#x\n", G_FW_ERROR_CMD_TYPE(be32toh(e->op_to_type))); return (EINVAL); } return (0); } static int sysctl_uint16(SYSCTL_HANDLER_ARGS) { uint16_t *id = arg1; int i = *id; return sysctl_handle_int(oidp, &i, 0, req); } static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS) { struct sge *s = arg1; struct hw_buf_info *hwb = &s->hw_buf_info[0]; struct sw_zone_info *swz = &s->sw_zone_info[0]; int i, rc; struct sbuf sb; char c; sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) c = '*'; else c = '\0'; sbuf_printf(&sb, "%u%c ", hwb->size, c); } sbuf_trim(&sb); sbuf_finish(&sb); rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (rc); } #ifdef RATELIMIT /* * len16 for a txpkt WR with a GL. Includes the firmware work request header. */ static inline u_int txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) { u_int n; MPASS(immhdrs > 0); n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); if (__predict_false(nsegs == 0)) goto done; nsegs--; /* first segment is part of ulptx_sgl */ n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); if (tso) n += sizeof(struct cpl_tx_pkt_lso_core); done: return (howmany(n, 16)); } #define ETID_FLOWC_NPARAMS 6 #define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) static int send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi, struct vi_info *vi) { struct wrq_cookie cookie; u_int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; struct fw_flowc_wr *flowc; mtx_assert(&cst->lock, MA_OWNED); MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == EO_FLOWC_PENDING); flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie); if (__predict_false(flowc == NULL)) return (ENOMEM); bzero(flowc, ETID_FLOWC_LEN); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | V_FW_WR_FLOWID(cst->etid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(cst->iqid); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; flowc->mnemval[5].val = htobe32(cst->schedcl); commit_wrq_wr(cst->eo_txq, flowc, &cookie); cst->flags &= ~EO_FLOWC_PENDING; cst->flags |= EO_FLOWC_RPL_PENDING; MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ cst->tx_credits -= ETID_FLOWC_LEN16; return (0); } #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) void send_etid_flush_wr(struct cxgbe_snd_tag *cst) { struct fw_flowc_wr *flowc; struct wrq_cookie cookie; mtx_assert(&cst->lock, MA_OWNED); flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie); if (__predict_false(flowc == NULL)) CXGBE_UNIMPLEMENTED(__func__); bzero(flowc, ETID_FLUSH_LEN16 * 16); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | V_FW_WR_FLOWID(cst->etid)); commit_wrq_wr(cst->eo_txq, flowc, &cookie); cst->flags |= EO_FLUSH_RPL_PENDING; MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); cst->tx_credits -= ETID_FLUSH_LEN16; cst->ncompl++; } static void write_ethofld_wr(struct cxgbe_snd_tag *cst, struct fw_eth_tx_eo_wr *wr, struct mbuf *m0, int compl) { struct cpl_tx_pkt_core *cpl; uint64_t ctrl1; uint32_t ctrl; /* used in many unrelated places */ int len16, pktlen, nsegs, immhdrs; caddr_t dst; uintptr_t p; struct ulptx_sgl *usgl; struct sglist sg; struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ mtx_assert(&cst->lock, MA_OWNED); M_ASSERTPKTHDR(m0); KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && m0->m_pkthdr.l4hlen > 0, ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); if (needs_udp_csum(m0)) { CXGBE_UNIMPLEMENTED("UDP ethofld"); } len16 = mbuf_eo_len16(m0); nsegs = mbuf_eo_nsegs(m0); pktlen = m0->m_pkthdr.len; ctrl = sizeof(struct cpl_tx_pkt_core); if (needs_tso(m0)) ctrl += sizeof(struct cpl_tx_pkt_lso_core); immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; ctrl += immhdrs; wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | V_FW_WR_FLOWID(cst->etid)); wr->r3 = 0; wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); wr->u.tcpseg.r4 = 0; wr->u.tcpseg.r5 = 0; wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); if (needs_tso(m0)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) ctrl |= V_LSO_ETHHDR_LEN(1); if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) ctrl |= F_LSO_IPV6; lso->lso_ctrl = htobe32(ctrl); lso->ipid_ofst = htobe16(0); lso->mss = htobe16(m0->m_pkthdr.tso_segsz); lso->seqno_offset = htobe32(0); lso->len = htobe32(pktlen); cpl = (void *)(lso + 1); } else { wr->u.tcpseg.mss = htobe16(0xffff); cpl = (void *)(wr + 1); } /* Checksum offload must be requested for ethofld. */ ctrl1 = 0; MPASS(needs_l4_csum(m0)); /* VLAN tag insertion */ if (needs_vlan_insertion(m0)) { ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); } /* CPL header */ cpl->ctrl0 = cst->ctrl0; cpl->pack = 0; cpl->len = htobe16(pktlen); cpl->ctrl1 = htobe64(ctrl1); /* Copy Ethernet, IP & TCP hdrs as immediate data */ p = (uintptr_t)(cpl + 1); m_copydata(m0, 0, immhdrs, (void *)p); /* SGL */ dst = (void *)(cpl + 1); if (nsegs > 0) { int i, pad; /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ p += immhdrs; pad = 16 - (immhdrs & 0xf); bzero((void *)p, pad); usgl = (void *)(p + pad); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | V_ULPTX_NSGE(nsegs)); sglist_init(&sg, nitems(segs), segs); for (; m0 != NULL; m0 = m0->m_next) { if (__predict_false(m0->m_len == 0)) continue; if (immhdrs >= m0->m_len) { immhdrs -= m0->m_len; continue; } sglist_append(&sg, mtod(m0, char *) + immhdrs, m0->m_len - immhdrs); immhdrs = 0; } MPASS(sg.sg_nseg == nsegs); /* * Zero pad last 8B in case the WR doesn't end on a 16B * boundary. */ *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; usgl->len0 = htobe32(segs[0].ss_len); usgl->addr0 = htobe64(segs[0].ss_paddr); for (i = 0; i < nsegs - 1; i++) { usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); } if (i & 1) usgl->sge[i / 2].len[1] = htobe32(0); } } static void ethofld_tx(struct cxgbe_snd_tag *cst) { struct mbuf *m; struct wrq_cookie cookie; int next_credits, compl; struct fw_eth_tx_eo_wr *wr; mtx_assert(&cst->lock, MA_OWNED); while ((m = mbufq_first(&cst->pending_tx)) != NULL) { M_ASSERTPKTHDR(m); /* How many len16 credits do we need to send this mbuf. */ next_credits = mbuf_eo_len16(m); MPASS(next_credits > 0); if (next_credits > cst->tx_credits) { /* * Tx will make progress eventually because there is at * least one outstanding fw4_ack that will return * credits and kick the tx. */ MPASS(cst->ncompl > 0); return; } wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie); if (__predict_false(wr == NULL)) { /* XXX: wishful thinking, not a real assertion. */ MPASS(cst->ncompl > 0); return; } cst->tx_credits -= next_credits; cst->tx_nocompl += next_credits; compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; ETHER_BPF_MTAP(cst->com.ifp, m); write_ethofld_wr(cst, wr, m, compl); commit_wrq_wr(cst->eo_txq, wr, &cookie); if (compl) { cst->ncompl++; cst->tx_nocompl = 0; } (void) mbufq_dequeue(&cst->pending_tx); mbufq_enqueue(&cst->pending_fwack, m); } } int ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) { struct cxgbe_snd_tag *cst; int rc; MPASS(m0->m_nextpkt == NULL); MPASS(m0->m_pkthdr.snd_tag != NULL); cst = mst_to_cst(m0->m_pkthdr.snd_tag); mtx_lock(&cst->lock); MPASS(cst->flags & EO_SND_TAG_REF); if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; const uint32_t rss_mask = vi->rss_size - 1; uint32_t rss_hash; cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; if (M_HASHTYPE_ISHASH(m0)) rss_hash = m0->m_pkthdr.flowid; else rss_hash = arc4random(); /* We assume RSS hashing */ cst->iqid = vi->rss[rss_hash & rss_mask]; cst->eo_txq += rss_hash % vi->nofldtxq; rc = send_etid_flowc_wr(cst, pi, vi); if (rc != 0) goto done; } if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { rc = ENOBUFS; goto done; } mbufq_enqueue(&cst->pending_tx, m0); cst->plen += m0->m_pkthdr.len; ethofld_tx(cst); rc = 0; done: mtx_unlock(&cst->lock); if (__predict_false(rc != 0)) m_freem(m0); return (rc); } static int ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) { struct adapter *sc = iq->adapter; const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); struct mbuf *m; u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); struct cxgbe_snd_tag *cst; uint8_t credits = cpl->credits; cst = lookup_etid(sc, etid); mtx_lock(&cst->lock); if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { MPASS(credits >= ETID_FLOWC_LEN16); credits -= ETID_FLOWC_LEN16; cst->flags &= ~EO_FLOWC_RPL_PENDING; } KASSERT(cst->ncompl > 0, ("%s: etid %u (%p) wasn't expecting completion.", __func__, etid, cst)); cst->ncompl--; while (credits > 0) { m = mbufq_dequeue(&cst->pending_fwack); if (__predict_false(m == NULL)) { /* * The remaining credits are for the final flush that * was issued when the tag was freed by the kernel. */ MPASS((cst->flags & (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == EO_FLUSH_RPL_PENDING); MPASS(credits == ETID_FLUSH_LEN16); MPASS(cst->tx_credits + cpl->credits == cst->tx_total); MPASS(cst->ncompl == 0); cst->flags &= ~EO_FLUSH_RPL_PENDING; cst->tx_credits += cpl->credits; freetag: cxgbe_snd_tag_free_locked(cst); return (0); /* cst is gone. */ } KASSERT(m != NULL, ("%s: too many credits (%u, %u)", __func__, cpl->credits, credits)); KASSERT(credits >= mbuf_eo_len16(m), ("%s: too few credits (%u, %u, %u)", __func__, cpl->credits, credits, mbuf_eo_len16(m))); credits -= mbuf_eo_len16(m); cst->plen -= m->m_pkthdr.len; m_freem(m); } cst->tx_credits += cpl->credits; MPASS(cst->tx_credits <= cst->tx_total); m = mbufq_first(&cst->pending_tx); if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) ethofld_tx(cst); if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) && cst->ncompl == 0) { if (cst->tx_credits == cst->tx_total) goto freetag; else { MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0); send_etid_flush_wr(cst); } } mtx_unlock(&cst->lock); return (0); } #endif Index: projects/clang700-import/sys/dev/efidev/efirt.c =================================================================== --- projects/clang700-import/sys/dev/efidev/efirt.c (revision 337615) +++ projects/clang700-import/sys/dev/efidev/efirt.c (revision 337616) Property changes on: projects/clang700-import/sys/dev/efidev/efirt.c ___________________________________________________________________ Deleted: svn:mergeinfo ## -0,110 +0,0 ## Reverse-merged /projects/clang380-import/sys/ia64/ia64/efi.c:r293006,293013 Reverse-merged /projects/contrib-netbsd-update-12/sys/dev/efidev/efirt.c:r303899-303984 Reverse-merged /stable/11/sys/ia64/ia64/efi.c:r303691,304208,304945,304947,304949-304951,305225-305226,305271,305910,305912,305914 Reverse-merged /projects/bsnmp-improved-ipv6-support/sys/dev/efidev/efirt.c:r301868 Reverse-merged /projects/quota64/sys/dev/efidev/efirt.c:r184125-207707 Reverse-merged /projects/zfsd/head/sys/amd64/amd64/efirt.c:r266519,269993 Reverse-merged /user/ngie/bsnmp_cleanup/sys/dev/efidev/efirt.c:r295193 Reverse-merged /projects/multi-fibv6/head/sys/dev/efidev/efirt.c:r230929-231848 Reverse-merged /projects/pf/head/sys/ia64/ia64/efi.c:r263908 Reverse-merged /projects/clang360-import/sys/amd64/amd64/efirt.c:r277327-280030 Reverse-merged /projects/elftoolchain/sys/amd64/amd64/efirt.c:r260687-261245 Reverse-merged /user/ngie/more-tests2/sys/amd64/amd64/efirt.c:r288935-289179,289223-289224,289226-289227,289230,289236,289325,289437,289440,289478,289484-289486,290904,290921 Reverse-merged /projects/zfsd/head/sys/ia64/ia64/efi.c:r266519,269993 Reverse-merged /projects/release-arm-redux/sys/amd64/amd64/efirt.c:r278203,278595-278597,278610,280643-280650,280652,280655,282539-282546,282548,282553-282557,282564,282566,282570,282573,282587-282593,282596-282607,282615-282616,282624-282629,282631,282633,282635-282640,282642,282647-282648,282653-282654,282656-282657,282659,282662-282667,282682,282691 Reverse-merged /projects/clang500-import/sys/dev/efidev/efirt.c:r316992-321352 Reverse-merged /projects/clang400-import/sys/dev/efidev/efirt.c:r311132-314524 Reverse-merged /projects/elftoolchain/sys/ia64/ia64/efi.c:r260880 Reverse-merged /projects/mpsutil/sys/ia64/ia64/efi.c:r286179-290100 Reverse-merged /head/sys/ia64/ia64/efi.c:r256280,256291,256293-256294,256299,256302,256304,256308,256321-256323,256325,256327-256328,256330-256331,256333-256335,256338,256341,256343,256345,256347-256348,256350,256362,256365,256385,256389,256391,256423,256425,256430,256440,256446,256448,256450,256459,256467,256470,256477,256489,256492,256498-256502,256504,256510,256512,256514,256533,256537,256539-256544,256546-256549,256551-256553,256555-256557,256561,256570-256571,256581,256594,256603,256606-256607,256610,256612-256614,256628,256637-256638,256640,256642-256643,256645-256647,256650,256657-256658,256661,256666,256670,256672,256678,256680,256682,256687,256689-256692,256694-256695,256705,256707-256711,256713-256718,256720-256722,256724,256735,256743-256746,256748,256750,256752-256753,256760-256771,256773-256779,256782,256788,256790,256792-256793,256798-256801,256803-256804,256806,256808-256809,256812-256818,256822,256826-256828,256830,256832-256833,256835-256836,256838-256839,256842-256843,256845-256848,256855,256859-256862,256864-256865,256868,256870-256871,256873,256875,256878,256880,256885,256887-256889,256893,256895,256898-256901,256911-256912,256914-256915,256919-256920,256925-256926,256931-256932,256934-256943,256945,256948-256951,256953,256955-256956,256959-256961,256963,256966-256969,256971-256975,256977-256978,256994-256995,256999,257005-257007,257015-257018,257029,257036-257038,257051,257054-257055,257057,257059-257062,257064-257066,257069-257072,257075,257077-257079,257084,257092-257093,257095-257100,257105,257109,257111,257114-257118,257127,257129-257130,257132,257138-257139,257142-257152,257155,257157-257159,257161-257162,257164-257165,257167-257172,257175,257178,257180,257182-257183,257186,257189-257191,257193,257195-257203,257206-257207,257209-257210,257214-257217,257221-257222,257228,257230-257231,257233-257235,257240,257248,257251,257258,257265-257266,257268,257272,257274,257276-257282,257287-257288,257291,257293,257295,257297-257300,257302,257304-257308,257315,257329,257332,257334-257338,257340-257345,257347,257349-257350,257359,257361,257364,257368-257370,257376-257384,257388,257390,257393,257399-257400,257402-257403,257407-257411,257413-257414,257416-257419,257421-257423,257429,257435,257440,257447,257452-257454,257469,257472,257475-257478,257480,257482-257487,257489-257490,257501,257504-257505,257511-257512,257515-257516,257518-257519,257532,257534,257539,257541-257543,257549,257555-257557,257561,257574,257582-257583,257592,257594-257595,257598,257600,257603-257604,257619-257620,257631,257633,257637-257639,257641,257643,257647-257650,257654,257656-257657,257660-257661,257667-257670,257672-257673,257676,257678-257680,257686,257694-257695,257701-257702,257705,257712,257729-257730,257732,257734,257736,257738-257740,257743,257745-257749,257751,257754-257757,257767,257769-257770,257772,257774-257775,257777,257779-257785,257787-257796,257800-257801,257803-257808,257811,257817-257821,257823-257831,257838,257841-257854,257856,257858-257860,257862-257864,257869-257870,257872-257874,257876-257877,257883,257888,257892,257896,257898-257904,257910,257913-257916,257920,257924,257929-257930,257932-257933,257936-257942,257945-257946,257955,257957-257958,257965,257985,257987,257991-257993,257995-257996,258000-258003,258005,258014,258016-258017,258020-258021,258024-258025,258027-258029,258036,258039,258041-258047,258050-258052,258054,258056-258057,258063-258066,258069,258071,258075-258082,258084,258086,258088,258094-258098,258101,258113-258115,258119,258122,258128,258132,258135,258137-258138,258143,258148-258150,258152-258157,258162,258164,258167-258170,258173,258176-258179,258181,258185,258187,258195-258197,258199-258202,258204-258207,258209-258211,258220-258221,258224,258226-258228,258232-258233,258235,258240,258243-258247,258250-258251,258254,258256-258257,258259,258262-258272,258274-258276,258281,258283,258285-258287,258289-258291,258294,258297-258299,258305,258307-258311,258314,258316-258321,258330-258332,258336-258338,258340,258342,258345,258347-258348,258350-258360,258362-258367,258387-258388,258390,258392-258393,258397,258399-258401,258406-258407,258410-258412,258418,258425,258427-258433,258436,258439,258441,258445,258448,258458,258469,258471,258475-258480,258489,258492,258494-258495,258497,258501,258503-258504,258507,258527,258530,258533,258535,258537,258545-258547,258549,258551-258553,258569-258570,258573-258574,258578-258582,258587-258592,258594,258597,258602,258605,258609,258614-258615,258617-258619,258622,258625,258628-258634,258638,258641-258643,258647-258648,258651,258658-258662,258664,258668-258669,258673,258675,258677-258679,258683,258689-258694,258696-258699,258702,258704-258705,258708-258709,258711-258717,258720,258722,258727-258728,258731-258733,258737,258739-258746,258748,258757-258758,258765,258770,258776,258778-258780,258785-258787,258789-258790,258793-258794,258796-258800,258802,258805-258807,258817,258819-258820,258826,258828,258830,258840,258845,258847,258851,258853-258855,258857,258859-258860,258869,258871,258873,258879,258884,258892-258893,258897,258901-258902,258904,258909,258914,258919,258921,258924,258927-258928,258930-258931,258941,258943,258950,258956,259003,259005,259007,259010,259013-259014,259016-259017,259019,259022-259023,259029-259039,259042-259044,259046-259049,259052-259054,259056-259058,259060,259071-259072,259079,259081-259085,259088-259090,259092,259094-259095,259099-259104,259107-259111,259113,259115,259117-259118,259121-259122,259125-259127,259129-259134,259140,259143-259145,259148,259150,259152,259156,259168-259169,259176,259178-259180,259182-259183,259191-259197,259199,259202-259203,259205,259208-259213,259219-259222,259232,259240,259244-259248,259261,259265-259267,259270,259274-259276,259284,259286-259287,259302,259330-259331,259339,259362,259382,259393-259395,259400,259407,259411,259413,259417,259421,259424-259430,259436-259438,259441,259462,259464,259468-259470,259472-259474,259476-259478,259480-259482,259484,259490,259493,259498,259502,259513-259514,259516-259518,259520-259522,259525-259529,259531-259532,259535,259537,259542-259547,259549-259550,259555,259558,259562,259564-259566,259569-259574,259576,259586-259589,259597-259598,259609,259612,259615,259626,259632-259635,259638,259640-259641,259645,259649-259651,259655,259657,259659,259662-259663,259666-259668,259674-259675,259677,259679-259681,259684-259686,259696,259699,259702,259708-259720,259724,259727-259730,259732-259733,259735-259737,259739-259740,259743-259744,259749-259750,259756,259761-259767,259771-259777,259779-259782,259792,259801,259808,259811-259813,259816,259823,259825-259828,259830,259833,259838-259846,259850,259854-259855,259860,259863,259868-259870,259872-259877,259879-259882,259884-259888,259892-259893,259896-259897,259902,259906,259908,259910,259913,259915-259916,259920-259922,259924-259929,259936-259937,259939-259940,259942-259944,259946,259950-259951,259953,259955,259959,259961,259973,259978,259997-259998,260003,260009,260014-260017,260019-260020,260023,260025-260026,260028,260031,260036,260038-260059,260061,260063-260064,260066-260070,260076,260079,260083,260086-260089,260092-260093,260095,260097,260099,260102-260106,260110-260113,260118,260121,260124-260125,260132,260138,260141-260142,260145,260150-260151,260156-260157,260160-260161,260163,260165-260167,260175,260180-260181,260183-260185,260187-260189,260204-260207,260210,260217,260219-260220,260222,260225,260228-260229,260232-260239,260243,260245-260247,260255,260257-260258,260260-260262,260267,260281-260283,260285,260288,260290,260292,260294-260295,260310-260311,260315,260320,260322-260323,260326-260328,260331-260334,260336,260340,260355,260358,260361,260367,260369,260371-260375,260377,260379-260383,260388-260390,260397,260401,260403,260407,260410,260415,260429,260440-260441,260444-260447,260449-260450,260457,260459-260460,260463,260466,260469,260481,260483-260486,260488,260490-260491,260493-260494,260496,260505-260506,260508-260509,260521-260526,260531-260532,260534-260536,260540-260542,260547,260549-260550,260553,260556-260557,260559,260563,260566-260567,260571,260576-260577,260581-260584,260586,260588-260589,260594-260597,260600,260605,260607,260610,260619,260621,260632-260637,260648,260653-260655,260666,260688,260691,260695-260696,260701-260704,260706,260713,260717,260752,260772,260782,260791,260793,260796,260800-260802,260808,260811-260812,260814,260830-260831,260833,260835-260836,260847,260863,260866,260871-260872,260883-260890,260893-260894,260899-260900,260903-260904,260910-260911,260913-260914,260921,260926,260934,260942,260949,260953,260972-260973,260976-260977,260986-260987,260996,260999-261001,261003-261005,261024,261027-261030,261032-261033,261037-261042,261068,261072,261074-261075,261080-261081,261083-261086,261089,261091-261092,261094-261095,261117-261118,261121-261134,261137-261138,261140-261141,261146-261147,261149-261151,261160-261173,261175,261178,261189,261192-261200,261211-261212,261214-261217,261220,261224,261227-261230,261233-261234,261242-261243,261247,261252,261257-261258,261260-261263,261267-261271,261279,261283-261284,261290-261291,261294,261296,261299-261302,261304-261305,261309,261315,261318-261322,261330,261336-261340,261342-261343,261345,261351-261355,261357-261358,261393-261398,261400-261401,261403-261424,261432,261435-261440,261442,261444-261447,261449,261453,261459,261479,261487,261491,261493-261497,261499-261501,261503-261507,261511-261518,261520-261522,261524-261531,261533-261538,261541-261544,261546-261547,261549-261553,261558,261562-261568,261570,261572,261577,261582-261586,261589-261593,261595-261599,261601,261603-261604,261606-261611,261613-261618,261620-261621,261627,261638-261643,261646-261651,261655-261657,261663,261668,261676-261677,261680-261690,261698,261701-261702,261708,261710-261715,261719,261722-261725,261742,261747-261768,261773-261774,261778,261780-261781,261783,261785-261786,261789-261791,261795-261797,261799-261801,261803-261805,261808,261811,261814-261819,261823-261827,261832-261838,261841-261848,261855,261858-261859,261861,261863,261867,261872,261875,261882-261885,261890-261905,261907-261908,261911,261913-261922,261931-261932,261934,261937-261940,261944-261947,261955-261960,261962,261977-261978,261981-261983,261987,261991,261994,262000,262005-262006,262011,262020,262027-262030,262036,262121,262123,262125,262128-262129,262133,262135-262136,262139-262140,262142-262144,262162,262184,262186,262194,262209,262220,262233,262236,262242-262244,262247,262250-262252,262273-262274,262277-262278,262280-262281,262294,262296,262303,262309-262311,262324-262334,262337-262338,262340-262341,262343-262345,262347,262351,262353-262355,262393-262394,262398-262401,262408-262411,262413,262417-262420,262424-262427,262439-262442,262447,262451,262454-262456,262461-262465,262467,262471-262472,262477-262478,262480,262482-262484,262487-262488,262494-262495,262499,262501-262502,262505-262507,262509,262513,262522-262523,262525-262526,262528,262530-262534,262539-262540,262542-262543,262547-262555,262559,262565,262568,262571-262572,262574-262575,262577,262581,262583-262585,262587,262591-262593,262596-262601,262603,262606-262611,262613-262615,262624-262627,262645-262647,262655,262657,262661,262663-262666,262669-262670,262676,262682,262689-262690,262694-262697,262708-262712,262714,262719,262725-262726,262728,262730,262732-262733,262736,262741,262744,262746-262748,262750,262752,262754-262755,262760,262763,262767,262769-262771,262775,262781-262782,262785,262789,262795,262799,262805-262806,262809-262810,262812,262814,262819,262822,262837,262847,262862,262864-262865,262867-262868,262870,262872,262877,262880,262884-262886,262890-262891,262894-262905,262908-262910,262912,262914,262916,262918-262921,262924-262925,262929-262930,262932,262935-262936,262940-262942,262945,262948-262950,262952-262955,262957-262962,262966,262972,262975,262979-262980,262982,262984-262987,262989-262991,262995,262997,262999,263005,263021,263030,263033-263036,263041,263048-263049,263052,263054,263056-263057,263059,263062,263079-263085,263087,263089-263092,263094-263096,263105-263107,263109,263113-263118,263120-263121,263123-263124,263129-263131,263133-263137,263139-263141,263144-263150,263153-263155,263159-263161,263169,263172-263175,263180-263181,263183-263193,263195,263199,263203-263204,263207,263210-263211,263217,263220-263222,263226-263227,263230-263234,263236-263239,263242-263246,263248-263254,263257,263259,263264-263265,263267,263271,263275-263278,263280,263287-263292,263297,263301-263306,263310-263313,263317,263320,263322-263323,263329,263331-263332,263336,263345-263346,263348-263349,263351-263353,263356,263362,263373,263380,263385,263388,263412,263423-263432,263434-263435,263445-263446,263451,263457-263460,263464,263468,263471,263475,263479,263497-263498,263530,263631-263632,263637-263638,263649-263650,263658,263664,263676,263678-263679,263686,263690-263694,263698,263704,263710-263712,263738,263740,263742-263745,263749,263752-263753,263755,263758,263768,263772,263774-263775,263777-263780,263795,263810-263812,263814-263815,263822,263826,263833,263842,263846-263847,263859,263863,263872-263873,263878-263879,263881,263885,263889-263892,263901,263910,263912-263914,263918-263919,263921-263926,263928,263933-263937,263948-263949,263952,263957,263966,263968-263969,263971,263978-263979,263981-263983,263985-263986,263989-263990,263998,264007-264013,264016-264031,264038-264039,264041,264046,264048-264052,264054-264059,264062,264065,264067-264068,264073,264077-264078,264082-264084,264086-264088,264090-264092,264094-264097,264099-264103,264105,264107,264109-264110,264114-264115,264119-264120,264122,264124-264133,264135,264137-264138,264142,264145-264146,264149-264151,264153,264160-264164,264173-264174,264177,264179-264183,264189-264191,264193-264194,264197,264203-264208,264212-264213,264215,264218-264220,264229-264230,264235,264238,264241-264244,264248,264251,264257-264259,264261-264264,264269,264274-264283,264291,264293-264297,264302,264304,264307-264308,264310-264311,264313-264321,264324,264327,264340-264351,264353-264356,264361-264364,264367,264384-264386,264388-264389,264391-264392,264394,264400,264403-264408,264410,264412-264422,264428,264434-264436,264448,264453,264460-264461,264465,264467-264469,264471,264479-264482,264486,264488-264489,264494,264498-264501,264504,264507,264509,264514-264518,264520,264524-264540,264542,264544-264545,264549-264550,264552,264565,264573,264582,264585,264600-264601,264604-264605,264608-264610,264617,264620-264621,264628,264630-264631,264646,264648,264650-264651,264653,264655-264656,264669-264672,264679,264681-264682,264686,264689,264691,264694,264696-264698,264701-264705,264709,264721,264725,264731,264737-264742,264749,264765-264766,264768-264770,264772,264781,264794-264795,264800-264803,264822,264825-264828,264831-264832,264834-264838,264840-264842,264845-264846,264849-264853,264856-264861,264863-264865,264867,264876-264881,264883-264889,264891-264892,264905,264907-264909,264912,264915-264918,264921-264923,264927,264933-264935,264963-264966,264968,264970,264972,264975,264977-264979,264981-264985,264988,264990,264992,264994-264995,264999,265002-265004,265012-265017,265020,265023-265025,265028,265035-265036,265038,265042,265046,265054,265057-265059,265062-265063,265072,265085,265090,265092,265094,265096-265102,265107,265111,265114,265148-265150,265152,265154-265157,265159,265162-265165,265170-265171,265191,265193-265194,265197,265201,265203,265206-265208,265211,265214,265218,265229-265230,265232,265236-265242,265244,265248-265256,265260-265264,265267,265269-265270,265275-265276,265285,265287,265289,265308,265310,265318,265320-265321,265323,265329,265331,265333,265336,265358-265360,265362-265366,265371,265376,265385-265386,265391-265392,265395,265397-265398,265402-265403,265407,265411,265418,265424,265427,265440-265442,265444-265447,265454-265456,265458,265462-265465,265467-265468,265473,265477,265484-265485,265534-265535,265539,265546,265555,265578,265583,265585,265590,265593-265595,265599,265601-265603,265605,265607,265624,265629-265631,265680-265681,265689-265691,265694,265703,265705,265709,265712-265713,265716,265719,265732,265739,265766,265776-265780,265783-265784,265798,265806,265811,265815,265817,265821-265822,265824-265825,265829,265836,265840,265842-265843,265845,265847,265850,265852-265853,265861-265864,265867,265870-265872,265876,265883,265886-265887,265900,265909,265913-265915,265923,265925-265927,265931,265941-265943,265948,265950-265951,265978,265995,266006-266007,266010-266012,266053,266074,266083,266091-266092,266103-266104,266107-266109,266111,266114,266116,266120-266121,266125,266136,266138-266143,266145,266147,266149-266150,266166,266169,266171,266174,266176-266177,266179-266180,266193,266195,266206,266208-266209,266225,266238,266248,266261,266263,266267,266270,266281,266285,266291,266293,266296-266297,266310,266319-266320,266322-266323,266335-266336,266351,266390,266394,266396,266399,266411,266416-266418,266420,266424,266426,266444-266445,266448,266463-266466,266468,266471-266473,266475-266476,266479,266483-266484,266490-266491,266494-266495,266497,266505,266509-266514,266520,266524,266527-266529,266533-266535,266538,266540-266542,266550-266556,266565-266566,266571-266573,266587-266588,266595-266597,266605-266606,266609,266615,266618-266621,266626-266627,266630,266633,266639,266641-266642,266647,266650-266651,266664,266671,266674-266675,266708-266709,266721,266724-266725,266728,266731,266735-266736,266738,266744,266746,266757,266760,266765,266771-266772,266774-266775,266777-266778,266780,266782,266792-266793,266798-266800,266803,266808,266813-266814,266820-266822,266826-266828,266833,266835-266836,266838-266839,266841-266842,266846-266848,266851-266852,266856-266857,266859-266863,266866,266878-266880,266895,266901-266903,266907-266908,266910,266915,266922-266924,266930-266931,266933-266935,266937-266938,266941-266944,266950-266951,266955,266959-266960,266969,266971,266981,266990,267004,267007,267009,267011-267012,267021,267029,267035,267038,267041,267044,267051,267060,267062,267066-267067,267078-267079,267082,267089-267090,267097-267099,267101,267105,267109,267118-267120,267123-267124,267126-267127,267129-267133,267141,267145-267146,267148,267153,267160,267162,267169,267172-267174,267176,267178-267179,267181,267183-267185,267191,267194,267196,267210-267213,267216,267221,267223,267226-267228,267232,267239-267240,267248,267252-267256,267260-267261,267264-267265,267276,267278,267290-267292,267294,267298,267300-267301,267306,267310-267311,267313,267319-267321,267324,267326-267327,267329-267331,267335-267338,267342,267351,267355-267360,267362,267366,267368,267372-267374,267376,267378,267385-267387,267390-267392,267395,267400,267423,267426,267429,267436-267439,267441,267451,267464,267473,267478-267479,267481-267483,267485-267486,267490-267493,267496-267500,267506,267511-267516,267519-267524,267537,267544,267547,267551,267554,267558-267559,267564,267572,267574,267577-267578,267582,267591,267597,267599-267601,267603,267606,267608-267618,267622-267627,267629-267630,267632,267634,267636-267641,267643,267647-267648,267651,267661-267664,267669-267674,267680,267682,267688-267689,267692-267693,267703-267706,267712,267719,267745,267756-267757,267759,267761,267766-267768,267785,267800-267801,267811-267812,267814-267815,267833-267834,267838-267839,267851-267852,267854,267858-267859,267865-267867,267869,267871-267873,267875,267877,267883-267884,267886-267887,267896-267897,267905-267906,267909,267912,267915,267918-267921,267929,267933-267934,267937,267939-267942,267949,267952,267955,267959-267961,267963,267965-267968,267970,267973,267978,267981-267982,267984-267987,267992-267993,268000-268003,268005-268006,268008,268012,268014,268017-268018,268022,268024-268025,268045,268049-268050,268059,268066,268071-268072,268074-268075,268078-268080,268082-268090,268095-268097,268103,268114-268116,268123,268125-268126,268128,268130,268134-268138,268156,268158-268160,268169,268172,268175,268178,268182,268185,268187,268193,268196,268202-268205,268209,268211-268212,268215,268224,268227,268230-268232,268236,268238,268240,268246,268254,268256,268264-268266,268268,268272-268273,268275-268276,268280,268283-268284,268286-268293,268295,268298-268300,268302-268303,268306-268309,268314-268316,268326-268328,268330,268336-268342,268350,268353-268354,268356-268357,268361-268365,268369-268370,268373,268376,268381,268383-268385,268387-268388,268391-268393,268395,268398,268401-268402,268407,268418-268421,268427-268429,268436-268437,268445-268447,268450,268457,268460,268463-268464,268466-268467,268469-268476,268480-268481,268487-268488,268490,268492-268493,268495,268502,268505,268507,268514,268521,268524-268527,268531-268532,268534,268536-268538,268540-268541,268544,268565,268569-268570,268576,268581-268585,268587-268591,268593,268597,268601,268605-268617,268620-268621,268624-268625,268631-268640,268642-268644,268646,268660,268671,268701,268706,268711-268713,268715,268720,268722,268725-268728,268735,268745-268751,268763-268767,268771-268772,268774,268776-268777,268780,268787,268795-268796,268798-268799,268801-268802,268806-268808,268811-268812,268817,268825,268827,268834-268836,268838-268840,268842-268843,268854-268855,268857-268861,268863-268867,268877-268878,268880,268883,268888-268890,268893,268895,268898,268921-268922,268924,268926,268928-268931,268945,268947-268949,268954,268957,268960,268971,268973-268975,268977,268979,268981-268983,268985-268986,268989,268993-268994,268997-268999,269001,269008,269015-269016,269020-269021,269023,269027,269029,269032,269036,269042-269043,269050-269054,269058,269074-269077,269079-269081,269086,269088-269089,269091-269094,269097-269098,269100,269106,269108-269109,269115-269118,269122-269128,269134,269137-269139,269143,269149,269159-269161,269180,269183-269194,269197,269204-269217,269221-269222,269228-269230,269233,269239-269240,269242,269244,269278,269281-269282,269289-269293,269302-269303,269306,269314,269316-269318,269326,269337-269339,269341,269347,269351-269355,269362-269366,269368,269376,269387,269390,269392-269396,269403-269405,269407-269411,269413-269414,269423-269425,269428,269430-269431,269433,269436-269438,269440-269442,269444-269445,269448,269450,269457,269460,269466,269469-269472,269475,269481,269485,269487-269488,269492,269497,269502,269522,269524-269525,269527-269528,269533-269534,269537,269543,269565-269567,269569,269575-269576,269578,269583,269585,269587,269594,269596-269598,269601,269603-269609,269611,269613,269615,269620,269622,269627,269631,269634,269636,269642-269644,269646,269653,269656,269662,269669,269674,269682,269685,269688,269695,269698,269700-269701,269708-269709,269727-269728,269731,269740,269743-269746,269750,269758-269759,269769-269771,269775-269777,269779-269780,269783,269788,269791,269806,269809-269811,269815,269822,269833,269838-269840,269842,269844,269851-269854,269858-269859,269865,269867,269871,269873,269875,269882,269884,269888,269896,269899,269901-269904,269906-269909,269945,269948,269950,269952-269954,269956,269962-269964,269973-269974,269976-269978,269983-269985,269989,269998,270004-270005,270010-270011,270019,270022-270025,270027-270028,270038,270062,270064-270065,270068-270069,270096,270098,270101,270114-270119,270129,270131-270135,270142,270144-270146,270151-270153,270155-270156,270160,270162,270165,270176,270179-270180,270183,270189-270192,270199-270200,270202-270204,270207,270209-270210,270212,270215-270216,270222,270225,270228-270232,270234,270239,270247-270249,270251,270253,270256,270259-270260,270265,270268-270269,270271-270273,270275-270276,270278-270283,270287-270290,270293,270299-270301,270303-270304,270311,270320-270322,270324,270326-270327,270329,270331-270332,270336,270338,270340-270343,270345,270348-270349,270382-270384,270387-270388,270390-270392,270399,270402,270404-270406,270411-270413,270416-270418,270423,270431-270434,270436-270437,270443,270445-270446,270448,270454-270455,270457,270485,270504-270507,270510,270512-270513,270516,270519,270521-270522,270571-270572,270587,270589,270613,270618,270620,270643,270647-270648,270650,270653,270657,270659-270661,270666-270669,270672-270677,270679,270689,270691,270698,270702,270705-270708,270710,270720-270722,270728,270750,270754,270757,270759,270780-270783,270785-270786,270795,270797-270798,270802-270803,270821-270823,270825-270826,270828-270830,270832-270833,270836,270844-270845,270847,270850,270855,270857-270859,270861,270863,270872,270879,270882,270885,270893,270912,270927-270928,270930-270935,270945,270947-270948,270953-270961,270973,270976,270989,270991-270993,271000,271007-271008,271014,271017-271018,271025-271030,271043,271045-271050,271053-271055,271057,271073,271076-271078,271082-271084,271094,271097-271102,271108,271119,271124,271133,271137,271140-271141,271143,271145-271147,271149,271151,271156-271157,271159,271163,271167-271169,271180,271187,271190,271192,271196-271197,271199,271201-271202,271204,271206-271209,271218-271222,271226-271228,271230,271241,271250,271253,271255-271256,271258-271259,271261,271282-271287,271299,271307-271311,271313,271315-271317,271319-271321,271328,271331,271336,271338,271349-271354,271358,271360,271362,271365-271366,271381-271382,271385,271389,271393-271395,271397-271398,271401,271403,271405,271407-271411,271422,271424,271429,271432-271434,271436-271437,271439,271443,271445-271452,271457-271459,271463-271468,271475,271480,271482-271483,271485,271487,271490-271493,271495-271496,271503,271505-271507,271524,271526-271528,271532,271534-271536,271539,271543,271545-271546,271549-271550,271552-271553,271560,271563,271567,271571,271577-271579,271586,271588-271589,271591,271594-271595,271597,271601,271603-271604,271606-271607,271609-271610,271614,271616-271617,271624,271628,271630,271635,271643-271645,271647-271649,271651,271663-271665,271670,271672-271674,271676-271680,271682,271684,271688-271689,271692,271695-271696,271700,271702,271705,271711,271716-271722,271726-271728,271731,271743,271745,271747,271754,271756,271758-271759,271761-271762,271781,271784-271785,271787-271789,271794,271796-271797,271802,271819,271834,271839,271845,271847,271854,271864,271868-271869,271871-271876,271879,271881-271882,271888-271893,271895,271899-271900,271906-271907,271909-271910,271913,271917-271919,271921,271924,271926-271927,271930-271931,271934,271936,271940-271942,271945-271946,271949,271951,271953-271954,271957-271959,271965,271970-271972,271974-271975,271977-271978,271980,271982,271990,271992,272000,272007,272022-272023,272025,272027-272028,272033,272036-272037,272040,272043-272044,272046,272049,272051-272053,272055-272057,272059,272069-272072,272076,272081,272083-272084,272086-272087,272102-272103,272105-272107,272109,272111,272127,272130,272132,272135,272137-272143,272153,272159,272161,272165,272168,272171,272173-272174,272176,272181-272183,272190,272193,272197-272198,272209,272217,272223-272224,272243,272245,272247-272248,272250,272252-272256,272258-272259,272263,272270,272273-272274,272278,272280-272282,272284,272288-272291,272294,272296,272300,272305,272308,272315,272323-272326,272328-272331,272333-272334,272343,272347-272349,272355-272356,272358-272359,272363,272377-272379,272383-272387,272389,272393-272395,272398-272399,272401-272406,272408-272411,272414,272416,272422,272441,272443-272446,272449,272455,272457-272458,272469-272471,272474-272475,272479-272481,272483-272485,272487,272490-272492,272502-272507,272509-272512,272519,272523,272527-272528,272533-272534,272536-272538,272547-272548,272550-272555,272562,272566-272567,272569,272571,272573-272575,272578-272579,272583-272584,272595,272597-272599,272601-272602,272605-272606,272613,272649-272650,272655,272658-272659,272666,272668,272670-272671,272678-272679,272683,272701,272706,272708,272710,272713,272715,272717-272721,272729-272735,272737-272744,272746-272751,272756-272757,272761-272763,272765,272769-272772,272777-272789,272796-272797,272799-272801,272805-272807,272809-272810,272812,272814-272815,272822,272830,272833,272836,272838-272839,272841-272845,272848,272878,272884-272886,272889-272891,272893,272897,272901-272903,272905-272911,272914-272915,272917,272931-272932,272935-272939,272943,272947,272949-272950,272952,272958,272962-272963,272974,272976,272978-272980,273003-273006,273008,273010-273012,273014-273015,273017-273026,273029,273034,273038,273040,273045-273048,273050-273051,273053,273060-273064,273066-273068,273072-273073,273075,273081,273087-273091,273093,273096,273102,273107-273108,273112,273114,273118,273121,273123-273124,273127,273129-273132,273135,273143-273144,273146,273158-273160,273163-273164,273168,273178,273181,273186,273201,273204,273209-273212,273214,273216,273218-273219,273235-273236,273243,273247-273248,273252,273256-273259,273261,273263-273264,273267,273273,273279-273285,273287-273288,273293,273298,273301,273328-273332,273337-273338,273342,273351-273353,273356,273359-273360,273370-273371,273375-273378,273381-273382,273389-273391,273393,273395-273397,273402,273405,273407,273410,273423,273434,273445,273448,273452,273455,273457,273459,273464-273468,273470,273480,273482,273486,273488-273489,273498,273507,273514-273517,273520-273530,273533-273541,273543-273544,273546,273548,273551,273561,273566,273569,273572,273574,273577-273579,273583-273585,273587,273590-273593,273598-273599,273602,273606-273607,273610,273613,273615,273619,273627-273628,273634-273635,273640-273641,273644,273647,273653,273666,273683,273687,273690,273693,273701,273703-273704,273706,273708,273710-273711,273718,273727-273728,273730-273731,273733,273737-273740,273744,273747-273750,273752-273754,273756,273760,273762,273766,273768-273774,273778-273780,273782-273788,273791,273793,273796-273797,273799-273800,273803,273809-273810,273813,273816,273820-273822,273834,273837,273848-273857,273861-273863,273866-273867,273871,273873,273876,273896,273902,273904,273911,273913-273914,273917-273918,273920,273922,273925-273929,273932-273938,273942-273943,273945-273952,273955,273962-273967,273969,273973,273985-273986,273988-273989,273991-273992,273995,273999,274011,274016-274017,274020-274023,274031,274033,274036,274038,274040,274045,274049-274055,274057,274060-274063,274065-274068,274072-274081,274083-274084,274088,274090-274092,274095,274100,274116-274117,274119-274121,274123-274124,274142-274146,274154,274159,274163-274164,274172,274188-274189,274192-274193,274203,274205-274206,274209,274215,274218,274223,274226-274228,274230,274246,274248-274249,274251,274253-274254,274267,274270,274276-274278,274286,274289,274294,274303-274304,274308-274310,274316,274322,274325,274328,274330,274333,274337,274343,274347,274349,274351,274364-274366,274370,274376-274379,274383,274398,274402,274407,274409-274416,274418,274434-274435,274437-274439,274442,274451-274456,274458,274460-274462,274465-274467,274473-274475,274477-274478,274483-274484,274487,274489-274490,274495-274496,274501-274503,274523,274532,274537-274538,274542,274545,274549,274555-274556,274559,274569,274571-274582,274587,274592-274593,274595-274605,274607,274618-274619,274621,274626-274630,274632-274633,274636,274638-274639,274641-274644,274653,274663,274670-274671,274673-274675,274681,274703,274708-274709,274711,274720,274722-274724,274727,274737-274738,274741-274742,274744,274750,274756,274766,274784-274786,274789-274792,274795-274797,274804-274806,274816-274822,274839-274841,274843,274845-274847,274851-274854,274856-274859,274878,274884,274898,274900,274918,274922,274924,274926,274931,274933,274936-274937,274940-274941,274954,274960,274962,274964-274965,274967,274976,274988,275003,275009,275011,275020,275032-275033,275035,275045-275046,275058,275060,275071,275098,275101,275109-275110,275112,275118,275120-275121,275123-275124,275137,275140,275162-275163,275170,275199,275205-275209,275256,275260,275264,275298,275321-275322,275328,275330,275335-275337,275347,275358-275359,275365-275366,275368,275374,275376,275378,275380,275385-275386,275390,275392-275394,275399,275401,275403-275405,275412,275415-275416,275418,275420,275435,275437-275438,275446-275447,275452,275455,275458-275459,275461,275473-275478,275481-275483,275503,275510,275512-275513,275515,275518-275524,275530-275531,275539,275552-275554,275557,275560-275564,275567-275568,275574-275576,275579,275581-275584,275588-275589,275593-275595,275599,275605-275606,275612,275614-275620,275622,275624,275633,275636-275639,275645-275646,275651,275653,275656-275660,275665-275666,275680-275683,275686-275687,275692,275698,275709,275721,275727,275729,275733-275740,275743-275748,275752-275753,275755-275756,275759-275760,275765,275768,275772,275779-275782,275790-275791,275800,275805-275806,275808,275811-275812,275816-275817,275819-275821,275829,275833,275838,275842,275846-275847,275850-275852,275857,275864-275871,275874,275897,275903,275905-275908,275918,275920,275922-275923,275925,275930,275940-275944,275946,275949-275954,275958-275963,275965,275967,276003-276004,276007-276009,276012-276013,276016,276019,276021,276026-276027,276029,276037,276045-276047,276049,276052-276053,276063,276065-276066,276069,276071,276079,276087,276095,276098,276101,276106,276123,276127,276141,276145-276146,276148,276161-276162,276165-276166,276168,276174,276176,276187,276190-276194,276196-276198,276200,276202-276204,276206,276212-276213,276215,276218,276221,276226,276228-276229,276238-276240,276247,276249-276250,276282,276296-276299,276303-276304,276306,276309,276313-276314,276318-276323,276333-276336,276340,276344-276347,276350-276351,276359,276377,276383,276392,276394-276397,276400,276402,276404,276407,276412,276417,276419,276426,276428,276430,276432,276439,276444-276446,276450,276462,276469-276470,276472,276480,276483,276485,276489,276491,276495,276498,276508-276509,276511-276512,276517-276519,276521-276523,276525,276532,276534,276539,276550,276564,276570-276571,276574,276577,276589-276590,276596-276599,276605,276607,276611-276612,276626-276627,276630,276632,276636,276638,276642,276644-276645,276654,276666,276669,276671,276681,276695,276699,276702-276704,276714,276717,276723-276724,276728-276729,276749,276751,276758,276763-276766,276771,276774-276775,276795,276798-276799,276801,276803-276806,276808,276814-276815,276820,276822-276823,276825,276827-276828,276831-276835,276839-276840,276842,276844,276846-276848,276861,276863,276867,276879-276880,276883,276891-276893,276898,276901-276902,276904,276906-276907,276913-276914,276948-276949,276952,276959,276962,276981-276983,276985,277023,277025-277028,277030-277034,277037-277038,277042-277044,277046-277051,277053,277055,277057,277082,277084-277085,277088,277096,277098-277102,277126,277128,277130,277132,277135-277136,277143,277147,277149,277151,277158,277168-277172,277175,277177,277179,277185,277199,277202,277205-277208,277211-277212,277215-277219,277225-277227,277229-277230,277233-277237,277239,277245-277247,277258-277259,277262,277265,277270,277272,277274,277278,277291,277295,277300-277301,277305-277307,277309-277310,277318,277321-277322,277328,277333,277337,277340,277346-277352,277354-277355,277357-277360,277365,277372,277380,277385,277390-277391,277396,277416-277419,277424,277433,277440,277450,277452-277454,277458,277460,277463-277467,277469-277481,277485,277487-277488,277492,277501,277503-277504,277512-277516,277523,277527,277529,277531-277533,277536,277541,277555,277568,277579,277594,277606,277608-277610,277626-277627,277629,277637,277642-277647,277649-277650,277652,277655-277656,277659,277663,277666,277674-277678,277685-277687,277693,277695,277706,277709-277710,277712-277714,277717,277725-277728,277730-277736,277738-277741,277743,277758-277759,277764,277788,277792-277796,277799,277802,277806,277811,277814-277815,277826-277829,277834,277836-277839,277841,277853-277855,277862,277868-277869,277877,277883-277895,277898-277899,277901,277903,277907,277914-277915,277917,277920-277922,277925-277927,277936,277938-277939,277944,277948-277949,277951-277953,277957-277959,277962,277969-277970,277972,277974,277988-277989,278000-278001,278004,278010,278016,278031-278032,278034,278037-278038,278040,278047,278053,278061,278070-278071,278074,278098-278099,278101,278103,278105,278111,278114,278118-278119,278135-278137,278145-278148,278151-278154,278159-278161,278166-278167,278172-278173,278182,278192-278193,278202,278204,278206,278209,278212-278213,278220-278222,278228,278233-278234,278237,278239,278248-278251,278254-278255,278257,278268,278282,278292,278297,278300,278302-278303,278311,278313-278314,278316,278320-278323,278325,278327,278335-278340,278342,278348,278352,278354,278360,278362,278364,278370-278372,278374,278379,278398,278402,278433,278438,278449,278458-278462,278464-278470,278472,278474-278477,278483,278485,278488-278493,278500,278502-278503,278518-278519,278521,278523,278526,278530,278532,278551,278582,278584,278586,278594,278598,278600,278603,278605-278607,278618-278619,278621-278623,278625,278627,278633,278636,278640,278653,278655,278658,278672,278681-278682,278697,278704,278706,278728,278739,278742,278751,278760-278761,278767,278770,278776,278790-278791,278794-278795,278802-278803,278806,278810,278817-278819,278826-278828,278830-278844,278848-278850,278856-278858,278860,278865-278866,278868,278870-278871,278874,278888-278889,278891,278902,278905,278915,278924-278926,278932-278933,278937-278942,278963-278964,278970,278976,278983-278984,279016-279017,279033,279037,279046-279048,279050,279076-279078,279080-279081,279083-279084,279089,279091-279092,279094-279098,279114,279117,279121,279123,279125-279128,279139,279141-279147,279172-279183,279186,279197-279198,279205-279206,279209-279210,279215,279219-279220,279223,279225,279227-279233,279235-279236,279239-279246,279248-279251,279253,279256-279257,279261,279266,279268,279270,279275-279278,279281-279284,279297,279301,279307,279310-279312,279314-279318,279320-279321,279324,279326,279330,279336,279338,279346,279351,279359-279362,279364,279366-279368,279375,279385,279390,279393,279395-279396,279398,279400-279401,279410-279411,279413,279421-279440,279444,279489,279491,279493,279531,279534,279536,279538,279540,279543-279544,279551,279554,279559,279563-279564,279567-279568,279570-279571,279573,279584,279587-279591,279593,279597-279600,279624,279642,279651-279652,279654,279657-279658,279673,279675-279676,279683-279684,279691,279695,279698,279700-279702,279706,279720,279722-279726,279728-279730,279735,279738,279756-279758,279760,279764,279766,279773,279776,279779,279802,279806-279808,279810-279813,279815-279816,279819,279821,279824-279827,279837,279841-279843,279845-279846,279850-279851,279854,279856,279858-279860,279862-279865,279867,279869,279875,279886,279892,279894,279896,279901-279903,279910,279913-279916,279919-279920,279925,279927,279929,279931,279935-279936,279941,279945,279949-279955,279957,279959-279960,279963,279965,279967-279969,279975-279977,279979-279980,279987,279996,280004,280017,280026,280037,280040-280044,280047,280090,280121,280125-280126,280130,280133-280134,280154,280160-280164,280166,280169,280172,280177-280180,280182-280183,280187,280191,280194-280197,280204,280206,280208,280210-280211,280221-280222,280228,280230-280232,280234-280236,280238,280249,280252-280254,280262-280264,280278-280279,280286,280293,280297,280299,280301,280307-280308,280310-280311,280321-280323,280325,280330-280331,280336,280342,280345,280347,280351,280357,280360,280371,280374-280380,280382-280385,280387-280388,280393,280402-280404,280429-280435,280439-280440,280447,280451,280459,280463,280475,280495,280572-280574,280597-280598,280630,280634,280640,280642,280685-280688,280700,280702,280709,280713-280714,280716,280721,280725,280756-280758,280760,280763-280765,280767-280768,280772,280775,280780-280782,280786,280792-280793,280797,280805,280807-280808,280814,280816,280818-280819,280822,280830,280834,280840,280845-280846,280848-280850,280858,280861,280864,280866,280870,280878-280879,280881-280882,280884,280893-280895,280904,280914-280916,280919-280926,280928-280931,280933-280939,280949-280950,280953-280957,280959,280962,280968,280974-280976,280980-280981,280983,280988,280990-280991,280998-280999,281002-281003,281005-281006,281009,281011,281015-281016,281026-281027,281039,281053,281058-281060,281070-281071,281073-281074,281077,281081-281082,281084,281086,281094,281107-281109,281112-281113,281116-281117,281120-281121,281136,281138,281145-281146,281159-281160,281162-281167,281169-281171,281176-281182,281199-281200,281202,281206,281209,281216,281225,281234,281236,281254,281265-281266,281271-281272,281275,281280-281283,281285,281307-281311,281316,281320,281324,281331,281335,281337,281354-281356,281358-281362,281367,281371-281372,281380-281381,281383,281387,281391-281404,281406-281412,281440-281441,281451,281462,281470,281475,281483,281495,281499-281500,281502,281524,281529,281531-281532,281536-281537,281540,281542,281544,281548-281550,281558-281559,281561-281563,281581-281582,281591-281593,281599,281601,281605,281611-281612,281616-281618,281626,281628,281630,281655,281666-281667,281670,281674,281677,281689,281696,281698,281700-281701,281703-281704,281712,281723,281726,281728,281734-281736,281738,281745,281751-281752,281756,281762-281769,281772-281773,281775,281777,281780-281783,281785,281787,281789,281795,281800,281809,281812,281820,281823,281825,281828,281832,281838,281840,281857,281860,281870-281873,281875,281877,281879,281881,281883-281884,281887,281915-281916,281918,281920,281923-281924,281928-281930,281932,281941,281944,281946,281956,281959-281960,281962,281966-281967,281984,281987,282017,282023-282025,282041-282042,282054,282056-282057,282059,282061-282063,282067,282071-282076,282084-282089,282092,282097,282104,282106,282109,282112,282115-282116,282119-282122,282125-282128,282130-282131,282133-282138,282144,282148,282152,282201,282205-282206,282208-282209,282211-282213,282215,282236,282238,282241,282244-282245,282247,282254,282257,282259,282261,282266,282269,282273,282277,282281,282284,282287,282289-282291,282293,282296,282299-282301,282304,282312,282328,282331,282335-282336,282339,282341,282344-282345,282351,282364,282407-282408,282415-282419,282424,282429,282434-282436,282443,282465,282469,282473,282475,282482,282485,282500,282505,282516,282519-282520,282524-282533,282536,282550-282552,282558,282560,282563,282565,282567,282571,282574,282577-282578,282594-282595,282608,282613,282632,282641,282643,282645-282646,282650-282652,282658,282660-282661,282679-282681,282683,282685-282687,282690,282693,282697-282700,282706,282708-282709,282712-282713,282716,282718-282721,282727,282730-282731,282739-282741,282743,282747,282766,282772,282784-282785,282787-282799,282805,282809-282810,282817,282821,282857,282863,282865-282866,282880-282881,282885,282888,282897-282905,282908,282914,282916,282921-282922,282932,282940-282942,282944,282948,282965,282967,282978,282996-282998,283000-283001,283007-283009,283013-283014,283018,283023,283025,283032-283033,283035,283048-283051,283053,283056,283061-283064,283066-283067,283069,283075,283092-283093,283101-283106,283110,283114-283115,283117,283119,283121,283123,283126,283128,283141,283144,283146-283147,283149,283153,283162-283163,283168,283170,283245,283252,283254-283257,283261,283264-283266,283268-283269,283271-283274,283278,283281-283282,283285-283286,283288,283290,283293,283295,283298-283299,283301-283302,283307-283308,283313,283320,283330,283357,283364,283369-283386,283388-283403,283405-283408,283410-283425,283427-283456,283459-283468,283470-283476,283478-283480,283482-283484,283486-283498,283502,283506,283514-283515,283524-283525,283542,283544,283546-283547,283562,283569,283573-283578,283580,283593,283599-283602,283604,283612-283613,283618,283622,283624,283629-283630,283635,283645,283647-283648,283650,283654,283657-283658,283661-283662,283664-283666,283673-283674,283679-283681,283691-283692,283695,283735,283745,283753,283784-283786,283795,283801,283806,283808-283811,283813-283820,283832,283836,283840-283843,283845,283858,283863-283864,283869-283870,283889,283891,283893-283897,283913,283919-283920,283922-283924,283929,283933,283936,283939,283959,283961-283962,283966,283968,283973,283975,283988,283991-283992,284000,284007,284010-284013,284022-284025,284032,284044,284046,284049,284051,284102,284104-284107,284110-284114,284117-284130,284133,284135,284137,284139-284140,284148-284149,284151,284157,284159,284163,284165-284167,284174-284175,284179-284180,284192,284207,284222,284229,284237,284245,284247-284250,284254,284260,284264,284269-284270,284277,284280,284283,284289-284290,284296-284297,284301,284303-284304,284306,284308-284309,284323-284326,284329,284331-284333,284335,284346,284348,284351-284355,284377-284378,284383-284386,284392-284393,284405,284408-284409,284416,284423,284425-284428,284436,284445,284447,284470,284477,284495,284512-284513,284515,284526-284528,284531,284539-284542,284547,284551-284552,284567,284578,284582,284589,284591,284593-284594,284596,284600-284601,284604,284607-284609,284611-284612,284617,284622,284626-284627,284630,284636,284639-284641,284644,284646,284649,284654-284656,284658,284660,284672,284676,284681-284683,284688,284691,284697-284698,284709,284712,284717-284719,284722,284724,284727-284728,284730,284739,284741,284743,284746-284749,284765,284777,284779-284780,284792,284808,284811-284812,284814,284858,284863-284864,284872-284873,284875,284877,284882-284884,284887,284889,284893,284895-284897,284913-284916,284918-284921,284927-284931,284933,284941-284942,284956,284965,284968-284969,284984,284988,284996,285005-285006,285010-285011,285020-285021,285028-285030,285037,285039,285041-285042,285046,285050-285051,285053,285059,285064,285066-285067,285086,285088-285089,285100,285104,285113,285118,285133-285134,285136-285138,285140-285141,285146-285147,285154-285158,285169-285170,285173,285188,285190,285204,285217-285221,285225-285226,285229-285231,285237,285240,285246,285248,285251-285253,285256,285260-285261,285269,285279,285282,285284,285318,285325,285329,285339-285340,285384,285394-285396,285398,285401,285403,285405-285406,285408-285409,285411-285415,285418,285420,285424,285426-285428,285430,285433-285435,285440-285444,285457,285459,285481-285483,285510,285512-285513,285522,285527-285529,285531,285543,285552-285554,285557,285590-285592,285594,285600,285621,285623,285628,285630,285638-285639,285642-285644,285648,285651,285657-285658,285663-285664,285667,285669,285671-285676,285678-285679,285684,285701,285710,285715,285719-285720,285722,285730,285732-285733,285735,285742,285767-285768,285773,285775-285776,285782-285783,285785,285792,285796,285798,285815-285816,285837-285844,285847,285858-285859,285869-285870,285873,285875,285877-285879,285881-285882,285886-285889,285891,285909,285912-285914,285925,285932,285935,285938,285944-285948,285958,285960,285972-285973,285975,285984-285985,285989,285996-285997,285999,286010,286017,286024,286043,286045-286047,286066,286074,286086,286090,286092,286102,286106-286107,286118,286131,286139-286140,286142,286150-286152,286154-286158,286162-286163,286167,286169,286173,286177,286196-286204,286206,286208,286210-286211,286215,286217-286218,286223,286226,286228,286234,286237-286238,286243,286256,286258-286260,286265,286281,286283,286285,286288-286289,286293,286304,286317,286320-286321,286328,286330-286331,286338,286341,286344-286345,286353,286358,286360-286361,286367-286370,286375,286378,286380-286383,286385,286388-286389,286395,286400,286402,286404-286406,286409,286414,286417-286419,286423,286429,286447-286448,286451,286456,286461-286462,286469,286490-286491,286503-286506,286510,286512-286516,286519,286539,286541,286543,286545,286547,286549,286551,286554,286556,286561-286562,286569-286570,286574-286576,286578-286579,286582,286587,286589,286591,286593,286595-286596,286598,286600-286601,286603,286605,286613,286615,286617,286621-286623,286625-286626,286628,286641,286647,286649,286652,286655,286660,286667,286677,286683,286686,286689,286699-286702,286705,286708,286710,286712,286719-286720,286723,286733,286737,286762-286764,286766-286767,286770,286773-286778,286780-286781,286784-286785,286790,286798-286799,286802,286805-286809,286811,286814,286816,286822,286827,286829-286834,286836-286838,286848-286849,286857,286860,286862,286867-286868,286873,286886-286888,286890-286894,286910,286913-286914,286926,286933,286937-286940,286942-286944,286947,286951,286962-286967,286970-286971,286974-286975,286981-286983,286985-286986,286991,286993,286999,287004-287005,287009,287012,287020-287021,287025,287033-287034,287081,287093-287095,287097-287100,287103,287107,287109,287112,287117,287119-287121,287123,287125,287138,287143,287148,287151-287153,287155,287178-287179,287182-287183,287208,287216,287220-287221,287234-287238,287247,287264-287265,287280-287284,287289,287292-287294,287299-287300,287309-287310,287317,287319-287321,287324,287327,287330,287335-287337,287340,287345,287349,287354-287355,287357-287358,287360-287361,287366,287368-287369,287372,287374,287376,287378-287386,287389-287390,287395-287397,287400,287402-287406,287413-287414,287420-287422,287429,287432-287433,287436-287437,287440,287442,287444-287445,287448,287453-287459,287465,287468,287473,287475,287479,287483,287485,287489,287493-287494,287497,287499-287500,287520,287522,287528,287534-287535,287537-287538,287541,287563-287564,287567,287574-287576,287579-287581,287590-287592,287595,287599,287607-287611,287613-287621,287633-287635,287649-287654,287664,287669-287671,287673-287675,287683,287685-287698,287701-287707,287711-287712,287714-287715,287717-287721,287724-287726,287728,287744-287745,287747-287748,287753-287754,287756-287758,287760,287764-287768,287770-287772,287774-287775,287778,287783-287785,287799,287803,287806-287807,287816,287818-287819,287821,287823,287825,287827-287828,287830-287831,287833,287842-287843,287855,287860,287864,287866,287868,287870,287875,287880,287886,287912-287913,287917-287918,287920-287921,287927,287930,287933-287935,287937,287940,287944,287951,287955-287957,287964,287967-287968,287978-287983,287986,287988,287991-287994,287997-287998,288000-288001,288006,288020-288021,288025,288031-288033,288043-288044,288057,288059,288061,288064,288067-288068,288070-288071,288075,288081,288083,288091-288092,288099,288104,288110-288111,288116,288120,288138,288143,288146,288148,288153-288154,288158,288160,288165-288166,288170,288175,288179-288180,288198-288201,288204,288208,288211,288213-288217,288220-288221,288224,288229-288230,288233,288238-288239,288246-288249,288258-288262,288264,288266-288267,288271-288273,288276,288278,288281,288295,288298,288303-288306,288309-288310,288330,288335-288337,288339-288341,288345,288347-288348,288358-288359,288361-288372,288374,288380-288381,288390-288391,288405-288406,288419-288420,288423-288424,288427,288429-288430,288446-288450,288452,288454-288456,288458,288470,288477,288486,288488,288490,288522,288524,288528-288529,288575,288579,288600,288625-288626,288653,288678,288826,288829,288832,288834,288902-288903,288905-288907,288910,288914,288944,288949-288950,288952-288953,288959-288963,288981,288984,288993-288994,288997,289001,289017,289026,289028-289031,289038,289041,289055,289058,289060,289063,289065,289067,289080,289083-289084,289091,289093,289095,289097-289098,289102,289104-289105,289109-289111,289113,289118,289136,289138,289146,289151-289158,289180,289186,289190-289192,289194-289195,289199,289203,289205-289209,289219,289225,289229,289231-289234,289238,289240,289255,289257-289259,289265-289266,289271-289274,289280-289281,289286,289289,289293,289295,289297,289299-289300,289305,289307,289309,289313,289315-289316,289321-289322,289324,289332,289337,289340-289349,289360-289362,289374-289375,289378-289379,289393,289396-289397,289407,289419-289422,289425,289430,289441,289445-289446,289450-289453,289469,289477,289480,289487-289488,289496-289497,289499-289500,289527-289528,289531,289536,289538-289546,289560,289562-289563,289570,289577,289592,289596-289598,289600-289602,289604-289605,289607-289620,289622,289626,289634-289637,289643,289645-289654,289656-289658,289660-289661,289664,289669,289676-289677,289681,289687,289693,289702,289704,289719,289726-289727,289732-289733,289736,289739,289743,289746,289755,289760,289764,289768-289769,289774-289777,289783,289790,289793-289794,289797,289812,289817,289819,289822-289824,289837-289838,289842-289843,289852,289855,289863,289866-289867,289870-289875,289877-289879,289881-289882,289886,289888,289890,289895-289897,289899,289901-289913,289916,289930-289933,289937,289939-289940,289942,289979-289980,289982-289983,290003-290004,290006-290008,290018,290020-290021,290023-290024,290028,290042,290047,290054,290073,290083-290084,290087,290102,290104,290110,290116,290118,290121-290122,290129-290132,290138-290140,290143-290144,290147,290153,290158-290161,290163-290164,290169-290170,290173-290174,290177-290182,290184,290188,290190-290191,290195,290229-290232,290236,290251,290253,290255,290259-290260,290262,290264-290268,290270,290275,290316,290320,290326,290329,290336-290337,290367,290370,290374,290387,290399-290401,290404-290406,290408,290412,290414-290416,290426,290428-290429,290431,290435,290437,290440-290442,290444,290450-290452,290458,290462,290466,290468,290480,290489,290492,290504,290506-290507,290515,290521,290532,290537-290540,290542,290548-290550,290560-290561,290563,290566-290567,290571-290573,290601,290615,290639,290641,290645-290647,290650,290659-290660,290662,290665,290670-290674,290678-290689,290693,290708-290710,290725,290728-290729,290742-290743,290810-290813,290820,290830,290843-290851,290855-290856,290860,290868-290871,290903,290905,290907-290909,290911-290915,290917,290920,290922,290929-290930,290946,290948,290959,290970,290978,290980-290981,290993-290994,291000-291001,291004,291012-291014,291016-291017,291022,291024,291026,291028-291035,291038,291047,291061,291067-291072,291078,291080-291081,291084-291085,291088,291091-291092,291097,291099,291114,291117,291120-291121,291125-291126,291132,291137-291138,291140-291141,291143-291147,291149-291150,291155-291164,291166-291169,291172,291180-291181,291188,291197-291199,291207,291209,291221,291225-291226,291238,291260-291261,291263,291265-291266,291280,291296,291301,291306,291329-291331,291338-291340,291342,291348-291349,291358-291359,291362-291365,291367,291369,291375-291377,291379-291380,291383,291390-291398,291408,291410,291432,291434,291436,291446,291453,291459-291461,291481,291488,291527,291534-291536,291545,291569-291570,291576,291578-291579,291582,291584-291588,291590,291605,291610-291611,291615,291637-291638,291641,291651,291653-291654,291657-291659,291671,291677-291680,291682,291684,291690-291691,291693-291694,291699-291700,291703,291705-291706,291716,291724,291727,291730,291741-291743,291746-291747,291752-291753,291766,291770,291793,291821,291826,291832-291839,291843,291845-291849,291862,291868,291872,291876,291891-291892,291896,291904,291908,291911,291919,291922-291928,291931-291932,291936,291938-291939,291941-291942,291947-291948,291950,291953-291954,291957,291978-291985,292000,292003-292005,292007-292013,292031-292032,292034,292038,292041-292042,292044,292046-292048,292050-292053,292055,292057-292060,292066,292069,292074,292086,292088,292090,292092-292093,292106,292121-292123,292128-292130,292135,292153,292206,292212,292216,292218,292226-292229,292234,292249-292250,292254,292258,292262-292263,292266,292277,292289-292290,292313,292316-292319,292323-292324,292327-292328,292330,292332,292337-292338,292355,292360,292366,292384,292394,292408-292411,292413,292419,292432-292435,292441-292446,292454-292455,292485,292489,292491-292493,292495-292497,292500-292501,292504,292507-292510,292513-292515,292517-292518,292522-292523,292527,292530-292533,292537-292539,292541-292546,292550,292552-292554,292558,292563,292569-292570,292573,292578,292581-292585,292595,292601-292608,292610,292620-292621,292638-292641,292647,292650-292651,292653-292654,292658,292661,292665,292669-292670,292674,292676,292682,292690,292697,292705,292710,292715,292719,292725,292733-292734,292739,292741,292743,292745,292749,292751-292752,292757,292759,292764-292765,292813-292816,292818-292820,292822,292829,292834-292838,292840,292846-292847,292849,292859,292861,292872,292877-292878,292884,292888,292890-292892,292894,292896,292899,292914,292943,292946-292950,292953,292957,292960-292961,292983,292989,292996,292999-293001,293014-293015,293028-293029,293032-293034,293037,293042-293043,293045-293049,293053,293055,293059,293061,293063-293065,293073,293105,293165,293173,293188,293190,293192,293221,293229,293231,293233-293234,293244-293245,293268-293269,293271,293274,293281,293305,293312,293331-293332,293334,293338,293340,293342-293343,293346,293349-293350,293357,293370,293379,293390,293414,293422-293423,293434,293436-293438,293440-293444,293452,293454,293458-293461,293469,293491,293612-293613,293617,293621-293622,293627,293653,293658-293659,293677,293679,293683,293704-293705,293708,293715,293719-293722,293724,293730-293734,293740,293745,293748-293758,293761-293770,293772-293775,293781,293783,293792,293796,293805-293815,293817-293819,293821,293825,293828,293830-293831,293833,293835,293851,293854-293856,293858,293860,293868-293871,293873-293875,293877-293878,293880,293887-293892,293895,293899-293902,293905,293913,293977,294027,294029,294032,294040-294042,294048,294057-294060,294062,294068,294072-294073,294075-294081,294091-294094,294102-294103,294123,294125-294128,294137,294183,294191,294196,294200-294201,294233-294235,294237,294249-294257,294259,294265,294284,294291,294309-294314,294317-294320,294322,294324-294326,294328,294330,294332-294333,294335-294336,294347,294358,294362,294366-294367,294370-294373,294407,294414,294452,294463-294464,294466-294467,294469-294470,294493-294498,294504,294506-294507,294514-294515,294520,294526,294530,294548-294549,294553-294554,294556-294557,294563,294565,294567,294578,294594-294598,294620-294621,294625,294652-294653,294655,294669,294684,294688,294694-294695,294700-294703,294705,294732,294734-294735,294749,294753,294765-294769,294773,294788,294794-294795,294797,294799,294801,294803,294805,294807,294809,294811,294813,294815,294817,294820,294832,294840,294847,294854,294860-294863,294873,294876-294878,294884,294886,294891-294894,294899-294900,294909,294915,294922-294926,294933,294935-294936,294949,294952-294953,294957,294965,294967-294968,294973,294995,295006,295012,295017,295021-295022,295026-295027,295029-295030,295032,295047,295051,295069-295070,295072,295074-295075,295077,295079,295093-295094,295116-295119,295121,295125,295133-295134,295139,295147,295159,295161,295174,295186,295202,295209,295234,295273,295276-295277,295295-295309,295320,295323-295324,295341,295345,295352,295356-295357,295384-295385,295391,295407-295408,295416-295419,295455,295463,295465,295467,295471,295477,295486-295489,295495-295497,295532-295533,295535-295536,295549,295562,295568,295574-295576,295578-295580,295583-295584,295588,295603-295606,295608,295616,295618,295632,295636-295637,295649,295651,295665,295668,295670-295672,295675,295677-295678,295708-295710,295717,295729-295730,295732,295737,295740-295749,295768,295771-295773,295792-295794,295800,295805,295810-295811,295822-295823,295830,295836,295844,295856,295861,295875-295877,295900-295901,295906,295914,295916,295918-295919,295923-295925,295928-295930,295944,295958,295964,295966-295967,295969,295976-295977,295980,295994-295995,295998,296000-296002,296009,296012,296014,296020-296022,296024-296025,296028,296063,296071,296076,296083-296089,296095,296122,296148,296178,296180-296181,296184,296187-296188,296252-296253,296278,296285-296286,296289-296291,296293,296296-296297,296299-296300,296305,296319-296320,296322,296342,296348,296379-296381,296388,296392,296394,296416,296419,296428,296449,296467,296470,296472-296473,296479,296501-296502,296510,296512,296514,296516,296519,296521,296523-296526,296528,296530,296533,296535,296537,296539,296541-296543,296546,296554,296557,296563,296567,296575,296579,296585,296593-296595,296610,296613,296615,296617,296623,296633-296634,296643,296651-296657,296673-296674,296707,296766,296769,296776,296799,296807,296816,296819,296899,296902,296908-296910,296914,296919,296922,296926,296932,296934,296956,296984,296987-296988,297023,297037,297039,297049,297051-297052,297060-297063,297070,297072,297137,297139,297142-297143,297147-297151,297155-297161,297172,297176-297183,297187,297196,297200-297203,297206,297212,297219-297221,297229,297232,297243,297255-297256,297265,297282,297296-297298,297308-297309,297311,297323-297326,297330,297334-297335,297337,297358,297360,297363-297364,297367,297370,297374,297386-297387,297391,297397-297398,297420-297422,297444,297456,297475-297476,297479,297484,297488,297507-297509,297513,297519-297522,297525,297527,297535,297557,297571,297618,297626,297634-297637,297672-297673,297678,297685,297688,297695-297696,297709,297741,297746,297751,297763,297771,297781,297791,297796,297800-297805,297807-297812,297815,297817-297818,297820,297827,297832,297836-297838,297841,297846,297848,297854,297856-297859,297867-297869,297871,297873,297884,297908,297912-297913,297915,297921,297925-297926,297931-297933,297942,297947,297963,297966-297968,297984-297986,297991,297999,298004-298005,298008,298012-298014,298017,298022,298024,298030-298031,298038-298043,298072,298088,298101,298103,298105-298106,298111-298112,298114,298131,298156,298173,298176,298192,298196,298212,298224,298259-298260,298270,298279-298280,298294-298295,298301,298303-298304,298311-298312,298318-298319,298328,298333-298334,298336-298340,298355,298366,298368,298370,298372,298377,298379-298380,298385,298402,298408,298420,298424,298439,298446,298448-298452,298458,298462-298464,298472-298473,298482-298484,298495-298496,298507,298516,298518-298525,298530,298551,298556,298561-298566,298568-298572,298574-298575,298584-298585,298589,298591,298597,298609,298613-298618,298620,298640,298644,298655-298656,298660,298664-298665,298668-298672,298676-298680,298683,298688-298690,298693-298696,298698,298703,298730-298732,298734-298737,298744,298750,298753,298755,298758-298759,298769,298771-298772,298783,298787,298809,298817,298819,298829,298831,298839,298842,298844-298845,298864,298868,298874,298881-298883,298885,298887-298888,298890-298891,298893,298898,298901,298904,298921-298922,298931,298950,298977,298981-298984,298987-298989,298996,299004,299015,299035,299060,299077,299085,299087,299089,299114-299115,299121,299162-299163,299196,299201,299205,299211,299213,299226,299234,299238,299242,299249-299254,299263,299265-299272,299274-299276,299310,299315,299318-299340,299344-299348,299350,299371,299373,299375,299377,299387,299401-299408,299410-299413,299460-299461,299465-299466,299484,299489-299491,299494-299496,299502-299508,299510-299511,299513-299515,299517-299518,299524-299525,299529,299539,299559,299573,299576-299581,299585,299589,299591-299612,299654-299655,299657,299659,299666-299672,299681,299691,299694,299699,299701,299710-299712,299718-299724,299726-299734,299736,299738,299753,299759-299767,299769-299770,299774,299778,299783,299802-299803,299805-299808,299810-299811,299814-299817,299829,299831-299834,299839-299841,299843-299844,299865-299869,299872-299873,299879-299880,299888-299890,299892-299894,299896-299909,299911-299927,299938,299940,299945-299949,299951-299953,299955,299957,299971,299977,299986,299988,299991,300002,300005,300007-300011,300024,300027,300030,300052,300058,300072,300082-300084,300089-300090,300100-300102,300105,300107-300108,300111-300112,300120-300124,300126-300127,300129-300135,300142,300145,300157,300167,300169,300177,300179-300180,300217-300218,300220,300222-300224,300231,300240,300253,300258-300260,300277,300280,300282,300293,300296,300301,300304-300305,300307,300319,300322,300327,300332-300333,300340-300341,300356,300359-300360,300364-300365,300376,300378,300385-300389,300395,300397,300411-300416,300420,300428-300432,300442,300455,300478-300481,300486-300487,300489,300501,300505,300508,300531,300547,300564-300565,300567-300574,300576,300596,300605-300610,300620-300622,300624-300625,300632-300633,300635-300636,300638,300642-300655,300660,300662,300664-300667,300670,300683-300684,300691-300692,300705-300706,300708,300714,300739,300747,300756,300758,300775,300779,300781,300783-300784,300792-300793,300809,300825,300827,300830-300832,300834,300836,300840-300842,300844-300849,300851,300856-300858,300861-300862,300867,300870,300874,300884,300890-300893,300931-300932,300934,300936-300939,300941-300942,300945,300947,300949,300953,300956,300959,300965,300967,300970,300972-300973,300981,300983,300985,300987-300989,300992-300994,300998,301007,301009,301015,301017-301022,301039,301053,301061,301065,301067,301069,301075-301076,301090,301101-301103,301105-301106,301109,301113,301115,301122,301125,301127,301130,301138-301139,301162,301180,301203,301206,301210,301212,301235,301237,301247,301273,301275,301278-301279,301291-301293,301295-301297,301300,301308-301309,301404,301406,301414,301427,301448,301457,301460,301465,301483-301484,301487-301488,301495,301513,301522,301532,301544-301545,301549-301550,301570,301574,301580,301582-301584,301588,301592,301596,301604-301605,301607,301683,301704,301707-301708,301710,301724,301737,301745,301749,301752,301758,301760,301764,301773,301775,301800,301842,301851,301853,301870-301871,301873-301874,301877,301928-301929,301956-301957,301959-301960,301962-301964,301966-301969,301974,302013-302014,302019-302020,302031,302036,302063,302075-302076,302080,302091,302123,302125,302139,302141,302151,302171,302174-302175,302179,302181,302196,302206,302210,302213,302221-302222,302236,302265,302278,302294,302296,302305-302306,302317,302326-302327,302330,302335-302336,302338,302346,302362-302365,302371,302373,302379,302382,302384,302402,302459-302460,302476,302482-302484,302486-302497,302499,302504,302507-302508,302510-302511,302515-302518,302520,302529-302531,302534,302541-302542,302550-302553,302558,302567,302573,302580,302605,302614,302622,302626,302635,302666-302668,302673,302770-302772,302778,302797,302824,302826-302827,302834-302840,302845,302855-302857,302861,302893,302899-302900,302902,302908,302911,302919,302921,302932-302933,302936,302943-302944,302946-302947,302957,302973,302999-303001,303003-303004,303009-303011,303013-303014,303031,303059,303074,303076,303084,303086,303088,303090,303101,303109-303110,303142,303147,303151,303155,303160,303208,303210-303211,303217,303225,303244,303266,303287,303338,303393,303399-303400,303423-303426,303429,303437,303461,303494,303503,303510,303514,303516,303530,303536,303551,303553-303554,303561,303564-303565,303573,303576,303638,303663,303676,303702,303704-303706,303710,303713,303715,303720,303737-303738,303760,303763,303765-303766,303773,303782,303791,303794-303795,303799-303800,303804,303806,303830,303855,303869-303870,303889,303891-303894,303897,303913-303914,303916,303919,303929-303935,303937,303942,303964,303971,303982,304008-304009,304011-304012,304018,304033-304034,304040,304055,304075,304149,304152,304162,304174,304184,304202,304227-304229,304231-304232,304238,304247-304249,304251,304286,304342,304440,304476,304487,304520-304521,304571,304597,304601,304607-304608,304629,304637,304641,304652,304674,304693-304694,304698,304703,304721,304747,304751,304755,304779-304782,304802,304808-304812,304819,304825,304874,304911,304928,304931,304953,304959,304964,304969,304989,305012,305023-305024,305033,305041,305077,305085-305086,305090,305123,305129,305133,305140,305144,305154,305170,305177,305219,305241,305269,305284,305344-305345,305356-305357,305360,305365,305388-305389,305406,305409,305412-305413,305421-305422,305430,305469,305536,305590,305679,305744,305939 Reverse-merged /projects/clang350-import/sys/dev/efidev/efirt.c:r274961-275126,275128-275133,275135-276476 Reverse-merged /vendor/resolver/dist/sys/dev/efidev/efirt.c:r1540-186085 Reverse-merged /projects/clang-sparc64/sys/dev/efidev/efirt.c:r262258-262612 Reverse-merged /projects/clang360-import/sys/dev/efidev/efirt.c:r277327-280030 Reverse-merged /user/ngie/more-tests/sys/amd64/amd64/efirt.c:r281427-281428,281430,281432,281450,281460,281464-281465,281485,281489-281491,281515,281519,281589,281593-281597,281619,284388,288316,288321-288327,288422,288476,288478-288481,288483,288578,288650-288651,288655-288656,288659-288661,288663,288673-288676,288680,288828,288930-288932 Reverse-merged /user/ngie/more-tests2/sys/ia64/ia64/efi.c:r288935-288940,288942-289179,289223-289224,289226-289227,289230,289236,289325,289437,289440,289484-289486,290904 Reverse-merged /projects/clang370-import/sys/dev/efidev/efirt.c:r287506-288928 Reverse-merged /projects/pms/sys/dev/efidev/efirt.c:r285199-285661 Reverse-merged /projects/clang380-import/sys/dev/efidev/efirt.c:r292913-296412 Reverse-merged /projects/largeSMP/sys/amd64/amd64/efirt.c:r221273-222812,222815-223757 Reverse-merged /projects/clang390-import/sys/dev/efidev/efirt.c:r303250-309123 Reverse-merged /projects/clang391-import/sys/dev/efidev/efirt.c:r309166-310192 Reverse-merged /projects/building-blocks/sys/amd64/amd64/efirt.c:r275142-275143,275198,275297,275306-275307,275309,275311,275556,275558,275600,277445,277670,277673 Reverse-merged /user/ngie/make_check/sys/dev/efidev/efirt.c:r291879-295379 Reverse-merged /projects/lldb-r201577/sys/dev/efidev/efirt.c:r262185-262527 Reverse-merged /projects/collation/sys/dev/efidev/efirt.c:r286424-290491 Reverse-merged /projects/release-arm-redux/sys/ia64/ia64/efi.c:r278203,278595-278597,278610,280643-280650,280652,280655,282539-282546,282548,282553-282557,282564,282566,282570,282573,282587-282593,282596-282607,282615-282616,282624-282629,282631,282633,282635-282640,282642,282647-282648,282653-282654,282656-282657,282659,282662-282667,282682,282691 Reverse-merged /projects/release-arm-redux/sys/dev/efidev/efirt.c:r278203,278595-278597,278610,280643-280650,280652,280655,282539-282546,282548,282553-282557,282564,282566,282570,282573,282587-282593,282596-282607,282615-282616,282624-282629,282631,282633,282635-282640,282642,282647-282648,282653-282654,282656-282657,282659,282662-282667,282682,282691 Reverse-merged /projects/release-arm64/sys/dev/efidev/efirt.c:r281786,281788,281792 Reverse-merged /projects/elftoolchain/sys/dev/efidev/efirt.c:r260687-261245 Reverse-merged /user/ngie/bug203673/sys/dev/efidev/efirt.c:r289470-289489 Reverse-merged /projects/lldb-r201577/sys/amd64/amd64/efirt.c:r262185-262527 Reverse-merged /projects/clang370-import/sys/amd64/amd64/efirt.c:r287506-288928 Reverse-merged /projects/ipfw/sys/dev/efidev/efirt.c:r267383-272837 Reverse-merged /projects/quota64/sys/amd64/amd64/efirt.c:r184125-207707 Reverse-merged /projects/clang-sparc64/sys/amd64/amd64/efirt.c:r262258-262612 Reverse-merged /user/ngie/release-pkg-fix-tests/sys/dev/efidev/efirt.c:r298865-299093 Reverse-merged /user/ngie/more-tests2/sys/dev/efidev/efirt.c:r288935-289179,289223-289224,289226-289227,289230,289236,289325,289437,289440,289478,289484-289486,290904,290921 Reverse-merged /projects/elftoolchain-update-r3130/sys/dev/efidev/efirt.c:r276164,276167,276170-276172 Reverse-merged /user/ngie/make_check/sys/amd64/amd64/efirt.c:r291879-295379 Reverse-merged /user/delphij/zfs-arc-rebase/sys/amd64/amd64/efirt.c:r281754 Reverse-merged /projects/lldb-r201577/sys/ia64/ia64/efi.c:r262185-262527 Reverse-merged /projects/collation/sys/amd64/amd64/efirt.c:r286424-290491 Reverse-merged /projects/release-arm64/sys/amd64/amd64/efirt.c:r281786,281788,281792 Reverse-merged /user/ngie/stable-10-fix-LINT-NOINET/sys/ia64/ia64/efi.c:r293623-293724 Reverse-merged /projects/release-pkg/sys/amd64/amd64/efirt.c:r274131-298104 Reverse-merged /projects/bsd_rdma_4_9/sys/dev/efidev/efirt.c:r325505-326168 Reverse-merged /projects/head_mfi/sys/dev/efidev/efirt.c:r233621 Reverse-merged /projects/clang-sparc64/sys/ia64/ia64/efi.c:r262258-262612 Reverse-merged /projects/elftoolchain-update-r3130/sys/amd64/amd64/efirt.c:r276164,276167,276170-276172 Reverse-merged /projects/netbsd-tests-update-12/sys/dev/efidev/efirt.c:r303985-305318 Reverse-merged /projects/contrib-netbsd-update-12/sys/amd64/amd64/efirt.c:r303899-303984 Reverse-merged /projects/random_number_generator/sys/dev/efidev/efirt.c:r254613-256243 Reverse-merged /head/sys/dev/efidev/efirt.c:r319973-323558,336870-337585 Reverse-merged /projects/mpsutil/sys/dev/efidev/efirt.c:r286179-290100 Reverse-merged /projects/clang500-import/sys/amd64/amd64/efirt.c:r316992-321352 Reverse-merged /user/ngie/more-tests/sys/ia64/ia64/efi.c:r281427-281428,281430,281432,281450,281460,281464-281465,281485,281489-281491,281515,281519,281589,281593-281597,281619,284388,288316,288321-288327,288422,288476,288478-288481,288483,288578,288650-288651,288655-288656,288659-288661,288663,288673-288676,288680,288828,288930-288931 Reverse-merged /projects/bsnmp-improved-ipv6-support/sys/amd64/amd64/efirt.c:r301868 Reverse-merged /projects/pms/sys/amd64/amd64/efirt.c:r285199-285661 Reverse-merged /projects/building-blocks/sys/ia64/ia64/efi.c:r275142-275143,275306-275307,275556,275558,277445,277670,277673 Reverse-merged /projects/clang380-import/sys/amd64/amd64/efirt.c:r292913-296412 Reverse-merged /projects/release-pkg/sys/dev/efidev/efirt.c:r274131-298104 Reverse-merged /user/ngie/stable-10-libnv/sys/ia64/ia64/efi.c:r292587-292857,293093,303608 Reverse-merged /projects/netbsd-tests-update-12/sys/ia64/ia64/efi.c:r304237 Reverse-merged /projects/cxl_iscsi/sys/amd64/amd64/efirt.c:r291227-291228,292618 Reverse-merged /projects/random_number_generator/sys/amd64/amd64/efirt.c:r254613-256243 Reverse-merged /user/ngie/more-tests/sys/dev/efidev/efirt.c:r281427-281428,281430,281432,281450,281460,281464-281465,281485,281489-281491,281515,281519,281589,281593-281597,281619,284388,288316,288321-288327,288422,288476,288478-288481,288483,288578,288650-288651,288655-288656,288659-288661,288663,288673-288676,288680,288828,288930-288932 Reverse-merged /user/ngie/socket-tests/sys/dev/efidev/efirt.c:r293882-293885,294103,294117,294119-294120,294245-294247,294488,294555,294643-294644 Reverse-merged /projects/clang-trunk/sys/dev/efidev/efirt.c:r283596-287505 Reverse-merged /user/ngie/release-pkg-fix-tests/sys/amd64/amd64/efirt.c:r298865-299093 Reverse-merged /projects/ipfw/sys/amd64/amd64/efirt.c:r267383-272837 Reverse-merged /projects/building-blocks/sys/dev/efidev/efirt.c:r275142-275143,275198,275297,275306-275307,275309,275311,275556,275558,275600,277445,277670,277673 Reverse-merged /vendor/device-tree/dist/sys/dev/efidev/efirt.c:r303380 Reverse-merged /projects/release-embedded/sys/dev/efidev/efirt.c:r262314,262504,262510-262511,262580,262660,262662,262700,262713,262774,262786-262788,262790-262792,262798,262802,262808 Reverse-merged /projects/zfsd/head/sys/dev/efidev/efirt.c:r266519,269993 Reverse-merged /projects/largeSMP/sys/dev/efidev/efirt.c:r221273-222812,222815-223757 Reverse-merged /user/ngie/bsnmp_cleanup/sys/ia64/ia64/efi.c:r295193 Reverse-merged /projects/vnet/sys/amd64/amd64/efirt.c:r295220 Reverse-merged /vendor/device-tree/dist/sys/amd64/amd64/efirt.c:r303380 Reverse-merged /projects/netbsd-tests-upstream-01-2017/sys/dev/efidev/efirt.c:r312125-313435 Reverse-merged /projects/pf/head/sys/amd64/amd64/efirt.c:r263908 Reverse-merged /projects/release-embedded/sys/amd64/amd64/efirt.c:r262314,262504,262510-262511,262580,262660,262662,262700,262713,262774,262786-262788,262790-262792,262798,262802,262808 Reverse-merged /projects/clang-trunk/sys/amd64/amd64/efirt.c:r283596-287505 Reverse-merged /projects/cxl_iscsi/sys/dev/efidev/efirt.c:r291227-291228,292618 Reverse-merged /user/ngie/socket-tests/sys/amd64/amd64/efirt.c:r293882-293885,294103,294117,294119-294120,294245-294247,294488,294555,294643-294644 Reverse-merged /projects/netbsd-tests-update-12/sys/amd64/amd64/efirt.c:r303985-305318 Reverse-merged /projects/head_mfi/sys/amd64/amd64/efirt.c:r233621 Reverse-merged /stable/10/usr.bin/head/sys/ia64/ia64/efi.c:r265256 Reverse-merged /projects/clang350-import/sys/amd64/amd64/efirt.c:r274961-275126,275128-275133,275135-276476 Reverse-merged /vendor/resolver/dist/sys/amd64/amd64/efirt.c:r1540-186085 Reverse-merged /user/delphij/zfs-arc-rebase/sys/dev/efidev/efirt.c:r281754 Reverse-merged /user/ngie/bsnmp_cleanup/sys/amd64/amd64/efirt.c:r295193 Reverse-merged /projects/clang390-import/sys/amd64/amd64/efirt.c:r306303-309123 Reverse-merged /projects/clang391-import/sys/amd64/amd64/efirt.c:r309166-310192 Reverse-merged /user/ngie/bug203673/sys/amd64/amd64/efirt.c:r289470-289489 Reverse-merged /projects/mpsutil/sys/amd64/amd64/efirt.c:r286179-290100 Reverse-merged /head/sys/amd64/amd64/efirt.c:r303250-305891 Reverse-merged /user/ngie/socket-tests/sys/ia64/ia64/efi.c:r293882-293885,294103,294117,294119-294120 Reverse-merged /projects/pf/head/sys/dev/efidev/efirt.c:r263908 Reverse-merged /projects/multi-fibv6/head/sys/amd64/amd64/efirt.c:r230929-231848 Reverse-merged /projects/vnet/sys/dev/efidev/efirt.c:r295220 Index: projects/clang700-import/usr.bin/stat/stat.c =================================================================== --- projects/clang700-import/usr.bin/stat/stat.c (revision 337615) +++ projects/clang700-import/usr.bin/stat/stat.c (revision 337616) @@ -1,1100 +1,1096 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2002 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Andrew Brown. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #if 0 #ifndef lint __RCSID("$NetBSD: stat.c,v 1.33 2011/01/15 22:54:10 njoly Exp $" "$OpenBSD: stat.c,v 1.14 2009/06/24 09:44:25 sobrado Exp $"); #endif #endif __FBSDID("$FreeBSD$"); #if HAVE_CONFIG_H #include "config.h" #else /* HAVE_CONFIG_H */ #define HAVE_STRUCT_STAT_ST_FLAGS 1 #define HAVE_STRUCT_STAT_ST_GEN 1 #define HAVE_STRUCT_STAT_ST_BIRTHTIME 1 #define HAVE_STRUCT_STAT_ST_MTIMENSEC 1 #define HAVE_DEVNAME 1 #endif /* HAVE_CONFIG_H */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_STRUCT_STAT_ST_FLAGS #define DEF_F "%#Xf " #define RAW_F "%f " #define SHELL_F " st_flags=%f" #else /* HAVE_STRUCT_STAT_ST_FLAGS */ #define DEF_F #define RAW_F #define SHELL_F #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ #if HAVE_STRUCT_STAT_ST_BIRTHTIME #define DEF_B "\"%SB\" " #define RAW_B "%B " #define SHELL_B "st_birthtime=%B " #else /* HAVE_STRUCT_STAT_ST_BIRTHTIME */ #define DEF_B #define RAW_B #define SHELL_B #endif /* HAVE_STRUCT_STAT_ST_BIRTHTIME */ #if HAVE_STRUCT_STAT_ST_ATIM #define st_atimespec st_atim #define st_ctimespec st_ctim #define st_mtimespec st_mtim #endif /* HAVE_STRUCT_STAT_ST_ATIM */ #define DEF_FORMAT \ "%d %i %Sp %l %Su %Sg %r %z \"%Sa\" \"%Sm\" \"%Sc\" " DEF_B \ "%k %b " DEF_F "%N" #define RAW_FORMAT "%d %i %#p %l %u %g %r %z %a %m %c " RAW_B \ "%k %b " RAW_F "%N" #define LS_FORMAT "%Sp %l %Su %Sg %Z %Sm %N%SY" #define LSF_FORMAT "%Sp %l %Su %Sg %Z %Sm %N%T%SY" #define SHELL_FORMAT \ "st_dev=%d st_ino=%i st_mode=%#p st_nlink=%l " \ "st_uid=%u st_gid=%g st_rdev=%r st_size=%z " \ "st_atime=%a st_mtime=%m st_ctime=%c " SHELL_B \ "st_blksize=%k st_blocks=%b" SHELL_F #define LINUX_FORMAT \ " File: \"%N\"%n" \ " Size: %-11z FileType: %HT%n" \ " Mode: (%OMp%03OLp/%.10Sp) Uid: (%5u/%8Su) Gid: (%5g/%8Sg)%n" \ "Device: %Hd,%Ld Inode: %i Links: %l%n" \ "Access: %Sa%n" \ "Modify: %Sm%n" \ "Change: %Sc" #define TIME_FORMAT "%b %e %T %Y" #define FLAG_POUND 0x01 #define FLAG_SPACE 0x02 #define FLAG_PLUS 0x04 #define FLAG_ZERO 0x08 #define FLAG_MINUS 0x10 /* * These format characters must all be unique, except the magic one. */ #define FMT_MAGIC '%' #define FMT_DOT '.' #define SIMPLE_NEWLINE 'n' #define SIMPLE_TAB 't' #define SIMPLE_PERCENT '%' #define SIMPLE_NUMBER '@' #define FMT_POUND '#' #define FMT_SPACE ' ' #define FMT_PLUS '+' #define FMT_ZERO '0' #define FMT_MINUS '-' #define FMT_DECIMAL 'D' #define FMT_OCTAL 'O' #define FMT_UNSIGNED 'U' #define FMT_HEX 'X' #define FMT_FLOAT 'F' #define FMT_STRING 'S' #define FMTF_DECIMAL 0x01 #define FMTF_OCTAL 0x02 #define FMTF_UNSIGNED 0x04 #define FMTF_HEX 0x08 #define FMTF_FLOAT 0x10 #define FMTF_STRING 0x20 #define HIGH_PIECE 'H' #define MIDDLE_PIECE 'M' #define LOW_PIECE 'L' #define SHOW_realpath 'R' #define SHOW_st_dev 'd' #define SHOW_st_ino 'i' #define SHOW_st_mode 'p' #define SHOW_st_nlink 'l' #define SHOW_st_uid 'u' #define SHOW_st_gid 'g' #define SHOW_st_rdev 'r' #define SHOW_st_atime 'a' #define SHOW_st_mtime 'm' #define SHOW_st_ctime 'c' #define SHOW_st_btime 'B' #define SHOW_st_size 'z' #define SHOW_st_blocks 'b' #define SHOW_st_blksize 'k' #define SHOW_st_flags 'f' #define SHOW_st_gen 'v' #define SHOW_symlink 'Y' #define SHOW_filetype 'T' #define SHOW_filename 'N' #define SHOW_sizerdev 'Z' void usage(const char *); void output(const struct stat *, const char *, const char *, int, int); int format1(const struct stat *, /* stat info */ const char *, /* the file name */ const char *, int, /* the format string itself */ char *, size_t, /* a place to put the output */ int, int, int, int, /* the parsed format */ int, int); int hex2byte(const char [2]); #if HAVE_STRUCT_STAT_ST_FLAGS char *xfflagstostr(unsigned long); #endif static const char *timefmt; static int linkfail; #define addchar(s, c, nl) \ do { \ (void)fputc((c), (s)); \ (*nl) = ((c) == '\n'); \ } while (0/*CONSTCOND*/) int main(int argc, char *argv[]) { struct stat st; int ch, rc, errs, am_readlink; int lsF, fmtchar, usestat, nfs_handle, fn, nonl, quiet; const char *statfmt, *options, *synopsis; char dname[sizeof _PATH_DEV + SPECNAMELEN] = _PATH_DEV; fhandle_t fhnd; const char *file; am_readlink = 0; lsF = 0; fmtchar = '\0'; usestat = 0; nfs_handle = 0; nonl = 0; quiet = 0; linkfail = 0; statfmt = NULL; timefmt = NULL; if (strcmp(getprogname(), "readlink") == 0) { am_readlink = 1; options = "fn"; synopsis = "[-fn] [file ...]"; statfmt = "%Y"; fmtchar = 'f'; quiet = 1; } else { options = "f:FHlLnqrst:x"; synopsis = "[-FLnq] [-f format | -l | -r | -s | -x] " "[-t timefmt] [file|handle ...]"; } while ((ch = getopt(argc, argv, options)) != -1) switch (ch) { case 'F': lsF = 1; break; case 'H': nfs_handle = 1; break; case 'L': usestat = 1; break; case 'n': nonl = 1; break; case 'q': quiet = 1; break; case 'f': if (am_readlink) { statfmt = "%R"; break; } statfmt = optarg; /* FALLTHROUGH */ case 'l': case 'r': case 's': case 'x': if (fmtchar != 0) errx(1, "can't use format '%c' with '%c'", fmtchar, ch); fmtchar = ch; break; case 't': timefmt = optarg; break; default: usage(synopsis); } argc -= optind; argv += optind; fn = 1; if (fmtchar == '\0') { if (lsF) fmtchar = 'l'; else { fmtchar = 'f'; statfmt = DEF_FORMAT; } } if (lsF && fmtchar != 'l') errx(1, "can't use format '%c' with -F", fmtchar); switch (fmtchar) { case 'f': /* statfmt already set */ break; case 'l': statfmt = lsF ? LSF_FORMAT : LS_FORMAT; break; case 'r': statfmt = RAW_FORMAT; break; case 's': statfmt = SHELL_FORMAT; break; case 'x': statfmt = LINUX_FORMAT; if (timefmt == NULL) timefmt = "%c"; break; default: usage(synopsis); /*NOTREACHED*/ } if (timefmt == NULL) timefmt = TIME_FORMAT; errs = 0; do { if (argc == 0) { if (fdevname_r(STDIN_FILENO, dname + sizeof _PATH_DEV - 1, SPECNAMELEN) != NULL) file = dname; else file = "(stdin)"; rc = fstat(STDIN_FILENO, &st); } else { int j; file = argv[0]; if (nfs_handle) { rc = 0; bzero(&fhnd, sizeof(fhnd)); j = MIN(2 * sizeof(fhnd), strlen(file)); if ((j & 1) != 0) { rc = -1; } else { while (j) { rc = hex2byte(&file[j - 2]); if (rc == -1) break; ((char*) &fhnd)[j / 2 - 1] = rc; j -= 2; } } if (rc == -1) errno = EINVAL; else rc = fhstat(&fhnd, &st); } else if (usestat) { /* * Try stat() and if it fails, fall back to * lstat() just in case we're examining a * broken symlink. */ if ((rc = stat(file, &st)) == -1 && errno == ENOENT && (rc = lstat(file, &st)) == -1) errno = ENOENT; } else rc = lstat(file, &st); } if (rc == -1) { errs = 1; linkfail = 1; if (!quiet) warn("%s: stat", file); } else output(&st, file, statfmt, fn, nonl); argv++; argc--; fn++; } while (argc > 0); return (am_readlink ? linkfail : errs); } #if HAVE_STRUCT_STAT_ST_FLAGS /* * fflagstostr() wrapper that leaks only once */ char * xfflagstostr(unsigned long fflags) { static char *str = NULL; if (str != NULL) free(str); str = fflagstostr(fflags); if (str == NULL) err(1, "fflagstostr"); return (str); } #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ void usage(const char *synopsis) { (void)fprintf(stderr, "usage: %s %s\n", getprogname(), synopsis); exit(1); } /* * Parses a format string. */ void output(const struct stat *st, const char *file, const char *statfmt, int fn, int nonl) { int flags, size, prec, ofmt, hilo, what; char buf[PATH_MAX + 4 + 1]; const char *subfmt; int nl, t, i; nl = 1; while (*statfmt != '\0') { /* * Non-format characters go straight out. */ if (*statfmt != FMT_MAGIC) { addchar(stdout, *statfmt, &nl); statfmt++; continue; } /* * The current format "substring" starts here, * and then we skip the magic. */ subfmt = statfmt; statfmt++; /* * Some simple one-character "formats". */ switch (*statfmt) { case SIMPLE_NEWLINE: addchar(stdout, '\n', &nl); statfmt++; continue; case SIMPLE_TAB: addchar(stdout, '\t', &nl); statfmt++; continue; case SIMPLE_PERCENT: addchar(stdout, '%', &nl); statfmt++; continue; case SIMPLE_NUMBER: { char num[12], *p; snprintf(num, sizeof(num), "%d", fn); for (p = &num[0]; *p; p++) addchar(stdout, *p, &nl); statfmt++; continue; } } /* * This must be an actual format string. Format strings are * similar to printf(3) formats up to a point, and are of * the form: * * % required start of format * [-# +0] opt. format characters * size opt. field width * . opt. decimal separator, followed by * prec opt. precision * fmt opt. output specifier (string, numeric, etc.) * sub opt. sub field specifier (high, middle, low) * datum required field specifier (size, mode, etc) * * Only the % and the datum selector are required. All data * have reasonable default output forms. The "sub" specifier * only applies to certain data (mode, dev, rdev, filetype). * The symlink output defaults to STRING, yet will only emit * the leading " -> " if STRING is explicitly specified. The * sizerdev datum will generate rdev output for character or * block devices, and size output for all others. */ flags = 0; do { if (*statfmt == FMT_POUND) flags |= FLAG_POUND; else if (*statfmt == FMT_SPACE) flags |= FLAG_SPACE; else if (*statfmt == FMT_PLUS) flags |= FLAG_PLUS; else if (*statfmt == FMT_ZERO) flags |= FLAG_ZERO; else if (*statfmt == FMT_MINUS) flags |= FLAG_MINUS; else break; statfmt++; } while (1/*CONSTCOND*/); size = -1; if (isdigit((unsigned)*statfmt)) { size = 0; while (isdigit((unsigned)*statfmt)) { size = (size * 10) + (*statfmt - '0'); statfmt++; if (size < 0) goto badfmt; } } prec = -1; if (*statfmt == FMT_DOT) { statfmt++; prec = 0; while (isdigit((unsigned)*statfmt)) { prec = (prec * 10) + (*statfmt - '0'); statfmt++; if (prec < 0) goto badfmt; } } #define fmtcase(x, y) case (y): (x) = (y); statfmt++; break #define fmtcasef(x, y, z) case (y): (x) = (z); statfmt++; break switch (*statfmt) { fmtcasef(ofmt, FMT_DECIMAL, FMTF_DECIMAL); fmtcasef(ofmt, FMT_OCTAL, FMTF_OCTAL); fmtcasef(ofmt, FMT_UNSIGNED, FMTF_UNSIGNED); fmtcasef(ofmt, FMT_HEX, FMTF_HEX); fmtcasef(ofmt, FMT_FLOAT, FMTF_FLOAT); fmtcasef(ofmt, FMT_STRING, FMTF_STRING); default: ofmt = 0; break; } switch (*statfmt) { fmtcase(hilo, HIGH_PIECE); fmtcase(hilo, MIDDLE_PIECE); fmtcase(hilo, LOW_PIECE); default: hilo = 0; break; } switch (*statfmt) { fmtcase(what, SHOW_realpath); fmtcase(what, SHOW_st_dev); fmtcase(what, SHOW_st_ino); fmtcase(what, SHOW_st_mode); fmtcase(what, SHOW_st_nlink); fmtcase(what, SHOW_st_uid); fmtcase(what, SHOW_st_gid); fmtcase(what, SHOW_st_rdev); fmtcase(what, SHOW_st_atime); fmtcase(what, SHOW_st_mtime); fmtcase(what, SHOW_st_ctime); fmtcase(what, SHOW_st_btime); fmtcase(what, SHOW_st_size); fmtcase(what, SHOW_st_blocks); fmtcase(what, SHOW_st_blksize); fmtcase(what, SHOW_st_flags); fmtcase(what, SHOW_st_gen); fmtcase(what, SHOW_symlink); fmtcase(what, SHOW_filetype); fmtcase(what, SHOW_filename); fmtcase(what, SHOW_sizerdev); default: goto badfmt; } #undef fmtcasef #undef fmtcase t = format1(st, file, subfmt, statfmt - subfmt, buf, sizeof(buf), flags, size, prec, ofmt, hilo, what); for (i = 0; i < t && i < (int)(sizeof(buf) - 1); i++) addchar(stdout, buf[i], &nl); continue; badfmt: errx(1, "%.*s: bad format", (int)(statfmt - subfmt + 1), subfmt); } if (!nl && !nonl) (void)fputc('\n', stdout); (void)fflush(stdout); } /* * Arranges output according to a single parsed format substring. */ int format1(const struct stat *st, const char *file, const char *fmt, int flen, char *buf, size_t blen, int flags, int size, int prec, int ofmt, int hilo, int what) { u_int64_t data; char *stmp, lfmt[24], tmp[20]; const char *sdata; char smode[12], sid[12], path[PATH_MAX + 4]; - struct passwd *pw; - struct group *gr; const struct timespec *tsp; struct timespec ts; struct tm *tm; int l, small, formats; tsp = NULL; formats = 0; small = 0; /* * First, pick out the data and tweak it based on hilo or * specified output format (symlink output only). */ switch (what) { case SHOW_st_dev: case SHOW_st_rdev: small = (sizeof(st->st_dev) == 4); data = (what == SHOW_st_dev) ? st->st_dev : st->st_rdev; #if HAVE_DEVNAME sdata = (what == SHOW_st_dev) ? devname(st->st_dev, S_IFBLK) : devname(st->st_rdev, S_ISCHR(st->st_mode) ? S_IFCHR : S_ISBLK(st->st_mode) ? S_IFBLK : 0U); if (sdata == NULL) sdata = "???"; #endif /* HAVE_DEVNAME */ if (hilo == HIGH_PIECE) { data = major(data); hilo = 0; } else if (hilo == LOW_PIECE) { data = minor((unsigned)data); hilo = 0; } formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | #if HAVE_DEVNAME FMTF_STRING; #else /* HAVE_DEVNAME */ 0; #endif /* HAVE_DEVNAME */ if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_ino: small = (sizeof(st->st_ino) == 4); data = st->st_ino; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_mode: small = (sizeof(st->st_mode) == 4); data = st->st_mode; strmode(st->st_mode, smode); stmp = smode; l = strlen(stmp); if (stmp[l - 1] == ' ') stmp[--l] = '\0'; if (hilo == HIGH_PIECE) { data >>= 12; stmp += 1; stmp[3] = '\0'; hilo = 0; } else if (hilo == MIDDLE_PIECE) { data = (data >> 9) & 07; stmp += 4; stmp[3] = '\0'; hilo = 0; } else if (hilo == LOW_PIECE) { data &= 0777; stmp += 7; stmp[3] = '\0'; hilo = 0; } sdata = stmp; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_OCTAL; break; case SHOW_st_nlink: small = (sizeof(st->st_dev) == 4); data = st->st_nlink; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_uid: small = (sizeof(st->st_uid) == 4); data = st->st_uid; - if ((pw = getpwuid(st->st_uid)) != NULL) - sdata = pw->pw_name; - else { + sdata = user_from_uid(st->st_uid, 1); + if (sdata == NULL) { snprintf(sid, sizeof(sid), "(%ld)", (long)st->st_uid); sdata = sid; } formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_gid: small = (sizeof(st->st_gid) == 4); data = st->st_gid; - if ((gr = getgrgid(st->st_gid)) != NULL) - sdata = gr->gr_name; - else { + sdata = group_from_gid(st->st_gid, 1); + if (sdata == NULL) { snprintf(sid, sizeof(sid), "(%ld)", (long)st->st_gid); sdata = sid; } formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_atime: tsp = &st->st_atimespec; /* FALLTHROUGH */ case SHOW_st_mtime: if (tsp == NULL) tsp = &st->st_mtimespec; /* FALLTHROUGH */ case SHOW_st_ctime: if (tsp == NULL) tsp = &st->st_ctimespec; /* FALLTHROUGH */ #if HAVE_STRUCT_STAT_ST_BIRTHTIME case SHOW_st_btime: if (tsp == NULL) tsp = &st->st_birthtimespec; #endif /* HAVE_STRUCT_STAT_ST_BIRTHTIME */ ts = *tsp; /* copy so we can muck with it */ small = (sizeof(ts.tv_sec) == 4); data = ts.tv_sec; tm = localtime(&ts.tv_sec); if (tm == NULL) { ts.tv_sec = 0; tm = localtime(&ts.tv_sec); } (void)strftime(path, sizeof(path), timefmt, tm); sdata = path; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_FLOAT | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_DECIMAL; break; case SHOW_st_size: small = (sizeof(st->st_size) == 4); data = st->st_size; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_blocks: small = (sizeof(st->st_blocks) == 4); data = st->st_blocks; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; case SHOW_st_blksize: small = (sizeof(st->st_blksize) == 4); data = st->st_blksize; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; #if HAVE_STRUCT_STAT_ST_FLAGS case SHOW_st_flags: small = (sizeof(st->st_flags) == 4); data = st->st_flags; sdata = xfflagstostr(st->st_flags); if (*sdata == '\0') sdata = "-"; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX | FMTF_STRING; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; #endif /* HAVE_STRUCT_STAT_ST_FLAGS */ #if HAVE_STRUCT_STAT_ST_GEN case SHOW_st_gen: small = (sizeof(st->st_gen) == 4); data = st->st_gen; sdata = NULL; formats = FMTF_DECIMAL | FMTF_OCTAL | FMTF_UNSIGNED | FMTF_HEX; if (ofmt == 0) ofmt = FMTF_UNSIGNED; break; #endif /* HAVE_STRUCT_STAT_ST_GEN */ case SHOW_realpath: small = 0; data = 0; if (file == NULL) { (void)strlcpy(path, "(stdin)", sizeof(path)); sdata = path; } else { snprintf(path, sizeof(path), " -> "); if (realpath(file, path + 4) == NULL) { linkfail = 1; l = 0; path[0] = '\0'; } sdata = path + (ofmt == FMTF_STRING ? 0 : 4); } formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_symlink: small = 0; data = 0; if (S_ISLNK(st->st_mode)) { snprintf(path, sizeof(path), " -> "); l = readlink(file, path + 4, sizeof(path) - 4 - 1); if (l == -1) { linkfail = 1; l = 0; path[0] = '\0'; } path[l + 4] = '\0'; sdata = path + (ofmt == FMTF_STRING ? 0 : 4); } else { linkfail = 1; sdata = ""; } formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_filetype: small = 0; data = 0; sdata = ""; if (hilo == 0 || hilo == LOW_PIECE) { switch (st->st_mode & S_IFMT) { case S_IFIFO: sdata = "|"; break; case S_IFDIR: sdata = "/"; break; case S_IFREG: if (st->st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) sdata = "*"; break; case S_IFLNK: sdata = "@"; break; case S_IFSOCK: sdata = "="; break; #ifdef S_IFWHT case S_IFWHT: sdata = "%"; break; #endif /* S_IFWHT */ #ifdef S_IFDOOR case S_IFDOOR: sdata = ">"; break; #endif /* S_IFDOOR */ } hilo = 0; } else if (hilo == HIGH_PIECE) { switch (st->st_mode & S_IFMT) { case S_IFIFO: sdata = "Fifo File"; break; case S_IFCHR: sdata = "Character Device"; break; case S_IFDIR: sdata = "Directory"; break; case S_IFBLK: sdata = "Block Device"; break; case S_IFREG: sdata = "Regular File"; break; case S_IFLNK: sdata = "Symbolic Link"; break; case S_IFSOCK: sdata = "Socket"; break; #ifdef S_IFWHT case S_IFWHT: sdata = "Whiteout File"; break; #endif /* S_IFWHT */ #ifdef S_IFDOOR case S_IFDOOR: sdata = "Door"; break; #endif /* S_IFDOOR */ default: sdata = "???"; break; } hilo = 0; } formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_filename: small = 0; data = 0; (void)strlcpy(path, file, sizeof(path)); sdata = path; formats = FMTF_STRING; if (ofmt == 0) ofmt = FMTF_STRING; break; case SHOW_sizerdev: if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) { char majdev[20], mindev[20]; int l1, l2; l1 = format1(st, file, fmt, flen, majdev, sizeof(majdev), flags, size, prec, ofmt, HIGH_PIECE, SHOW_st_rdev); l2 = format1(st, file, fmt, flen, mindev, sizeof(mindev), flags, size, prec, ofmt, LOW_PIECE, SHOW_st_rdev); return (snprintf(buf, blen, "%.*s,%.*s", l1, majdev, l2, mindev)); } else { return (format1(st, file, fmt, flen, buf, blen, flags, size, prec, ofmt, 0, SHOW_st_size)); } /*NOTREACHED*/ default: errx(1, "%.*s: bad format", (int)flen, fmt); } /* * If a subdatum was specified but not supported, or an output * format was selected that is not supported, that's an error. */ if (hilo != 0 || (ofmt & formats) == 0) errx(1, "%.*s: bad format", (int)flen, fmt); /* * Assemble the format string for passing to printf(3). */ lfmt[0] = '\0'; (void)strcat(lfmt, "%"); if (flags & FLAG_POUND) (void)strcat(lfmt, "#"); if (flags & FLAG_SPACE) (void)strcat(lfmt, " "); if (flags & FLAG_PLUS) (void)strcat(lfmt, "+"); if (flags & FLAG_MINUS) (void)strcat(lfmt, "-"); if (flags & FLAG_ZERO) (void)strcat(lfmt, "0"); /* * Only the timespecs support the FLOAT output format, and that * requires work that differs from the other formats. */ if (ofmt == FMTF_FLOAT) { /* * Nothing after the decimal point, so just print seconds. */ if (prec == 0) { if (size != -1) { (void)snprintf(tmp, sizeof(tmp), "%d", size); (void)strcat(lfmt, tmp); } (void)strcat(lfmt, "lld"); return (snprintf(buf, blen, lfmt, (long long)ts.tv_sec)); } /* * Unspecified precision gets all the precision we have: * 9 digits. */ if (prec == -1) prec = 9; /* * Adjust the size for the decimal point and the digits * that will follow. */ size -= prec + 1; /* * Any leftover size that's legitimate will be used. */ if (size > 0) { (void)snprintf(tmp, sizeof(tmp), "%d", size); (void)strcat(lfmt, tmp); } /* Seconds: time_t cast to long long. */ (void)strcat(lfmt, "lld"); /* * The stuff after the decimal point always needs zero * filling. */ (void)strcat(lfmt, ".%0"); /* * We can "print" at most nine digits of precision. The * rest we will pad on at the end. * * Nanoseconds: long. */ (void)snprintf(tmp, sizeof(tmp), "%dld", MIN(prec, 9)); (void)strcat(lfmt, tmp); /* * For precision of less that nine digits, trim off the * less significant figures. */ for (; prec < 9; prec++) ts.tv_nsec /= 10; /* * Use the format, and then tack on any zeroes that * might be required to make up the requested precision. */ l = snprintf(buf, blen, lfmt, (long long)ts.tv_sec, ts.tv_nsec); for (; prec > 9 && l < (int)blen; prec--, l++) (void)strcat(buf, "0"); return (l); } /* * Add on size and precision, if specified, to the format. */ if (size != -1) { (void)snprintf(tmp, sizeof(tmp), "%d", size); (void)strcat(lfmt, tmp); } if (prec != -1) { (void)snprintf(tmp, sizeof(tmp), ".%d", prec); (void)strcat(lfmt, tmp); } /* * String output uses the temporary sdata. */ if (ofmt == FMTF_STRING) { if (sdata == NULL) errx(1, "%.*s: bad format", (int)flen, fmt); (void)strcat(lfmt, "s"); return (snprintf(buf, blen, lfmt, sdata)); } /* * Ensure that sign extension does not cause bad looking output * for some forms. */ if (small && ofmt != FMTF_DECIMAL) data = (u_int32_t)data; /* * The four "numeric" output forms. */ (void)strcat(lfmt, "ll"); switch (ofmt) { case FMTF_DECIMAL: (void)strcat(lfmt, "d"); break; case FMTF_OCTAL: (void)strcat(lfmt, "o"); break; case FMTF_UNSIGNED: (void)strcat(lfmt, "u"); break; case FMTF_HEX: (void)strcat(lfmt, "x"); break; } return (snprintf(buf, blen, lfmt, data)); } #define hex2nibble(c) (c <= '9' ? c - '0' : toupper(c) - 'A' + 10) int hex2byte(const char c[2]) { if (!(ishexnumber(c[0]) && ishexnumber(c[1]))) return -1; return (hex2nibble(c[0]) << 4) + hex2nibble(c[1]); } Index: projects/clang700-import =================================================================== --- projects/clang700-import (revision 337615) +++ projects/clang700-import (revision 337616) Property changes on: projects/clang700-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r337591-337615