Index: cddl/contrib/opensolaris/lib/libdtrace/common/dt_subr.c =================================================================== --- cddl/contrib/opensolaris/lib/libdtrace/common/dt_subr.c +++ cddl/contrib/opensolaris/lib/libdtrace/common/dt_subr.c @@ -581,6 +581,7 @@ dt_printf(dtrace_hdl_t *dtp, FILE *fp, const char *format, ...) { va_list ap; + va_list ap2; int n; #ifndef illumos @@ -640,11 +641,14 @@ dtp->dt_buffered_buf[0] = '\0'; } - if ((needed = vsnprintf(NULL, 0, format, ap)) < 0) { + va_copy(ap2, ap); + if ((needed = vsnprintf(NULL, 0, format, ap2)) < 0) { rval = dt_set_errno(dtp, errno); + va_end(ap2); va_end(ap); return (rval); } + va_end(ap2); if (needed == 0) { va_end(ap); Index: share/dtrace/dsiftr.d =================================================================== --- /dev/null +++ share/dtrace/dsiftr.d @@ -0,0 +1,252 @@ +#!/usr/sbin/dtrace -s +/* + dsiftr.d, v0.1 (26 October 2014) + + # Copyright (c) 2014 Centre for Advanced Internet Architectures, + # Swinburne University of Technology. All rights reserved. + # + # Author: Grenville Armitage (garmitage@swin.edu.au) + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # + # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + + Rudimentary SIFTR-like logging under FreeBSD dtrace. Produces output + to stdout that emulates the /var/log/siftr.log file generated by siftr(8) + + Usage: ./dsiftr.d "" + + Launch this script with explicit IP addr string in w.x.y.z form + as 1st parameter, or "". Packets with that remote IP addr will + be excluded from logging (useful when debugging). Use "" to log + all packets. + + When the dtrace process receives SIGTERM it executes the dtrace:::END + probe (which prints siftr-like "disable" line to stdout) and exits. + +*/ + +/* Keep Dtrace quiet */ +#pragma D option quiet +/* Optionally allow the script to use system() call */ +/* #pragma D option destructive */ + +/* Various magic constants */ +inline string DSIFTR_VERSION="dsiftr0.1"; +inline int TCP_RTT_SCALE=32; /* Can we extract this from running kernel? */ +inline int IP_VERSION=4; /* We're only reporting IPv4 information */ +inline int KERN_HZ=`hz; /* Put ` in comment so editor (e.g. KWrite) parsing is kicked back into sync */ +inline int TF_SACK_PERMIT=0x000200; /* Borrowed from tcp_var.h. "other side said I could SACK" */ + + + +/* ************ Begin SIFTR emulation ************/ + +/* Generate first line of SIFTR-like log output */ +dtrace:::BEGIN +{ + /* printf("Running as script named %s\n", $0); */ + + /* Grab IP addr from command line to exclude from logging (if any) */ + EXCLUDE_IP = $$1 != "" ? $$1 : ""; + /* printf("Excluding IP addr %s\n", EXCLUDE_IP); */ + + /* Initialise global packet counts */ + out_pkt_count = 0; + in_pkt_count = 0; + + /* + 'walltimestamp' is an absolute time clock since 1jan1970, updated too slowly (every + kernel tick) to be used directly for SIFTR-like, per-packet absolute timestamps. + */ + wall_usec = walltimestamp/1000; + /* + 'timestamp' is a relative timer updated much more frequently than 'walltimestamp'. + We'll use 'timestamp' every time a probe fires, adjusted according to the difference + to absolute time when this dtrace script first begins logging. + */ + start_timestamp_delta = (timestamp/1000) - wall_usec; + wall_sec = wall_usec/1000000; + + /* + NOTE: Using walltimestamp to construct the base (start) time means all timestamps + (enable_time and per-packet events) will have the same, fixed error of up to one + tick relative to 'reality', depending when the Dtrace scripts statrs relative + to the most recent update of walltimestamp. + */ + + prev_wallusec = wall_usec; /* Initialise relative time count for internal debugging*/ + + /* Mimic SIFTR's first line of the following form: + enable_time_secs=1413330870 enable_time_usecs=707345 siftrver=1.2.4 hz=1000 tcp_rtt_scale=32 sysname=FreeBSD sysver=902001 ipmode=4 + */ + printf("enable_time_secs=%d enable_time_usecs=%d siftrver=%s hz=%d tcp_rtt_scale=%d sysname=%s sysver=%d ipmode=%d\n", + wall_sec, + wall_usec % 1000000, + DSIFTR_VERSION, + KERN_HZ, + TCP_RTT_SCALE, + `ostype, /* Put ` in comment so editor (e.g. KWrite) parsing is kicked back into sync */ + `osreldate, /* Put ` in comment so editor (e.g. KWrite) parsing is kicked back into sync */ + IP_VERSION); +} + +/* ****** Emulate SIFTR's "o" lines (output events) *****/ +tcp:::send +/ ( args[2]->ip_ver == IP_VERSION ) && (args[3]->tcps_addr != 0) && ( args[3]->tcps_raddr != EXCLUDE_IP ) / +{ + printf("o,"); + out_pkt_count++; +} + +/* ****** Emulate SIFTR's "i" lines (input events) *****/ +tcp:::receive +/ ( args[2]->ip_ver == IP_VERSION ) && (args[3]->tcps_addr != 0) && ( args[3]->tcps_raddr != EXCLUDE_IP ) / +{ + printf("i,"); + in_pkt_count++; +} + +/* ****** Print the rest of each "o" or "i" SIFTR log line *****/ +tcp:::send,tcp:::receive +/ ( args[2]->ip_ver == IP_VERSION ) && (args[3]->tcps_addr != 0) && ( args[3]->tcps_raddr != EXCLUDE_IP ) / +{ + /* + Direct inspection of the tcpcb to extract some fields not currently + exposed in args[3] the way we might like. + */ + this_tcpcb = (struct tcpcb *)args[3]->tcps_addr; + /* (tcpcb)->t_flags is not currently exposed in args[3] by FreeBSD's tcp.d */ + this_tcpcb_t_flags = this_tcpcb == NULL ? 0 : this_tcpcb->t_flags; + /* (tcpcb)->t_segqlen is not currently exposed in args[3] by FreeBSD's tcp.d */ + this_tcpcb_t_segqlen = this_tcpcb == NULL ? -1 : this_tcpcb->t_segqlen; + /* (tcpcb)->t_snd_max is not currently exposed in args[3] by FreeBSD's tcp.d */ + this_tcpcb_t_snd_max = this_tcpcb == NULL ? -1 : this_tcpcb->snd_max; + /* args[3]->tcps_rto is a scaled form of (tcpcb)->t_rxtcur, so why not grab t_rxtcur directly? */ + this_tcpcb_t_rxtcur = this_tcpcb == NULL ? -1 : this_tcpcb->t_rxtcur; + + /* Locate inpcb via the tcpcb */ + this_inpcb = this_tcpcb->t_inpcb; + + /* Calculate high speed wall clock timestamp from 'timestamp' */ + wall_usec = (timestamp/1000) - start_timestamp_delta; + wall_sec = wall_usec/1000000; + + /* Try to narrow down some timing issues */ + /* printf(" %d ", wall_usec - prev_wallusec); + @usec_diffs = lquantize(wall_usec - prev_wallusec,0,200000,1000); + prev_wallusec = wall_usec; */ + + /* Print the rest of the fake SIFTR line */ + printf("0x%x,%ld.%l06d,%s,%d,%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n", + 0, /* Dummy packet hash */ + wall_sec, + wall_usec % 1000000, + args[3]->tcps_laddr, + args[3]->tcps_lport, /* SIFTR uses inp->inp_lport */ + args[3]->tcps_raddr, + args[3]->tcps_rport, /* SIFTR uses inp->inp_fport */ + + args[3]->tcps_cwnd_ssthresh, + args[3]->tcps_cwnd, + 0, /* snd_bwnd, unused SIFTR field, kept for compat */ + args[3]->tcps_swnd, + args[3]->tcps_rwnd, + args[3]->tcps_snd_ws, + args[3]->tcps_rcv_ws, + args[3]->tcps_state, + + args[3]->tcps_mss, + args[3]->tcps_srtt, /* Requires tcp.d patched to expose (tcpcb)->t_srtt */ + + (this_tcpcb_t_flags & TF_SACK_PERMIT) != 0 ? 1 : 0, /* sack_enabled, SIFTR uses (tp->t_flags & TF_SACK_PERMIT) != 0 */ + this_tcpcb_t_flags, + + /* + NOTE: The next field requires tcps_rto calculation in /usr/src/cddl/lib/libdtrace/tcp.d + has been patched from: + tcps_rto = p == NULL ? -1 : p->t_rxtcur / 1000; + to: + tcps_rto = p == NULL ? -1 : (p->t_rxtcur * 1000) / `hz; + + SIFTR presents t_rxtcur as-is and calls it "rto in units of HZ", so we must reverse + the dtrace translator's calculation to get rto in ticks. + */ + /* (args[3]->tcps_rto * KERN_HZ)/1000, */ + + /* Alternative form -- extract the current RTO directly from tcpcb */ + this_tcpcb_t_rxtcur, + + this_inpcb->inp_socket->so_snd.sb_hiwat, /* snd_buf_hiwater, SIFTR uses inp->inp_socket->so_snd.sb_hiwat */ + this_inpcb->inp_socket->so_snd.sb_acc, /* snd_buf_cc, SIFTR uses inp->inp_socket->so_snd.sb_cc */ + this_inpcb->inp_socket->so_rcv.sb_hiwat, /* rcv_buf_hiwater, SIFTR uses inp->inp_socket->so_rcv.sb_hiwat */ + this_inpcb->inp_socket->so_rcv.sb_acc, /* rcv_buf_acc, SIFTR uses inp->inp_socket->so_rcv.sb_cc */ + (this_tcpcb_t_snd_max - args[3]->tcps_suna), /* sent_inflight_bytes, SIFTR uses (tp->snd_max - tp->snd_una) */ + this_tcpcb_t_segqlen /* SIFTR uses tp->t_segqlen */ + ); + + /* Update associative array of seen flows, used to create flow_list during dtrace:::END */ + @Allflows[args[3]->tcps_laddr, + args[3]->tcps_lport, + args[3]->tcps_raddr, + args[3]->tcps_rport] = count(); + +} + +/* Generate last line of SIFTR-like log output */ +dtrace:::END +{ + /* + Mimic SIFTR's disable line of the following form: + disable_time_secs=1413330955 disable_time_usecs=321501 num_inbound_tcp_pkts=2084 \ + num_outbound_tcp_pkts=3815 total_tcp_pkts=5899 num_inbound_skipped_pkts_malloc=0 \ + num_outbound_skipped_pkts_malloc=0 num_inbound_skipped_pkts_mtx=0 \ + num_outbound_skipped_pkts_mtx=0 num_inbound_skipped_pkts_tcpcb=0 \ + num_outbound_skipped_pkts_tcpcb=0 num_inbound_skipped_pkts_inpcb=6 \ + num_outbound_skipped_pkts_inpcb=3 total_skipped_tcp_pkts=9 \ + flow_list=10.1.1.24;22-10.1.1.11;27542,10.1.1.24;22-10.1.1.11;45498,172.16.11.3;81-172.16.10.2;29770,10.1.1.24;22-10.1.1.11;55703,10.1.1.24;22-10.1.1.11;55712, + */ + wall_usec = (timestamp/1000) - start_timestamp_delta; + wall_sec = wall_usec/1000000; + + printf("disable_time_secs=%d disable_time_usecs=%d", + wall_sec, + wall_usec % 1000000 + ); + + printf(" num_inbound_tcp_pkts=%d num_outbound_tcp_pkts=%d total_tcp_pkts=%d", + in_pkt_count, + out_pkt_count, + out_pkt_count + in_pkt_count + ); + + /* The rest of these fields are zero. Don't really apply to DTrace anyway. */ + printf(" num_inbound_skipped_pkts_malloc=0 num_outbound_skipped_pkts_malloc=0 num_inbound_skipped_pkts_mtx=0 num_outbound_skipped_pkts_mtx=0 num_inbound_skipped_pkts_tcpcb=0 num_outbound_skipped_pkts_tcpcb=0 num_inbound_skipped_pkts_inpcb=0 num_outbound_skipped_pkts_inpcb=0 total_skipped_tcp_pkts=0"); + + /* Print the keys (seen flows) in the associative array Allflows. */ + printf(" flow_list="); + printa("%s;%d-%s;%d,", @Allflows); + printf("\n"); + + /* For narrowing down some timing issues */ + /* printa("%@u\n", @usec_diffs); */ + +} +