Index: head/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c =================================================================== --- head/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c (revision 285703) +++ head/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c (revision 285704) @@ -1,1933 +1,1933 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef illumos #define GETOPT_EOF EOF #else #include #include #define mergesort(a, b, c, d) lsmergesort(a, b, c, d) #define GETOPT_EOF (-1) typedef uintptr_t pc_t; #endif #define LOCKSTAT_OPTSTR "x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V" #define LS_MAX_STACK_DEPTH 50 #define LS_MAX_EVENTS 64 typedef struct lsrec { struct lsrec *ls_next; /* next in hash chain */ uintptr_t ls_lock; /* lock address */ uintptr_t ls_caller; /* caller address */ uint32_t ls_count; /* cumulative event count */ uint32_t ls_event; /* type of event */ uintptr_t ls_refcnt; /* cumulative reference count */ uint64_t ls_time; /* cumulative event duration */ uint32_t ls_hist[64]; /* log2(duration) histogram */ uintptr_t ls_stack[LS_MAX_STACK_DEPTH]; } lsrec_t; typedef struct lsdata { struct lsrec *lsd_next; /* next available */ int lsd_count; /* number of records */ } lsdata_t; /* * Definitions for the types of experiments which can be run. They are * listed in increasing order of memory cost and processing time cost. * The numerical value of each type is the number of bytes needed per record. */ #define LS_BASIC offsetof(lsrec_t, ls_time) #define LS_TIME offsetof(lsrec_t, ls_hist[0]) #define LS_HIST offsetof(lsrec_t, ls_stack[0]) #define LS_STACK(depth) offsetof(lsrec_t, ls_stack[depth]) static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t); static void report_trace(FILE *, lsrec_t **); extern int symtab_init(void); extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *); extern uintptr_t sym_to_addr(char *name); extern size_t sym_size(char *name); extern char *strtok_r(char *, const char *, char **); #define DEFAULT_NRECS 10000 #define DEFAULT_HZ 97 #define MAX_HZ 1000 #define MIN_AGGSIZE (16 * 1024) #define MAX_AGGSIZE (32 * 1024 * 1024) static int g_stkdepth; static int g_topn = INT_MAX; static hrtime_t g_elapsed; static int g_rates = 0; static int g_pflag = 0; static int g_Pflag = 0; static int g_wflag = 0; static int g_Wflag = 0; static int g_cflag = 0; static int g_kflag = 0; static int g_gflag = 0; static int g_Vflag = 0; static int g_tracing = 0; static size_t g_recsize; static size_t g_nrecs; static int g_nrecs_used; static uchar_t g_enabled[LS_MAX_EVENTS]; static hrtime_t g_min_duration[LS_MAX_EVENTS]; static dtrace_hdl_t *g_dtp; static char *g_predicate; static char *g_ipredicate; static char *g_prog; static int g_proglen; static int g_dropped; typedef struct ls_event_info { char ev_type; char ev_lhdr[20]; char ev_desc[80]; char ev_units[10]; char ev_name[DTRACE_NAMELEN]; char *ev_predicate; char *ev_acquire; } ls_event_info_t; static ls_event_info_t g_event_info[LS_MAX_EVENTS] = { { 'C', "Lock", "Adaptive mutex spin", "nsec", "lockstat:::adaptive-spin" }, { 'C', "Lock", "Adaptive mutex block", "nsec", "lockstat:::adaptive-block" }, { 'C', "Lock", "Spin lock spin", "nsec", "lockstat:::spin-spin" }, { 'C', "Lock", "Thread lock spin", "nsec", "lockstat:::thread-spin" }, { 'C', "Lock", "R/W writer blocked by writer", "nsec", "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" }, { 'C', "Lock", "R/W writer blocked by readers", "nsec", "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" }, { 'C', "Lock", "R/W reader blocked by writer", "nsec", "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" }, { 'C', "Lock", "R/W reader blocked by write wanted", "nsec", "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" }, { 'C', "Lock", "R/W writer spin on writer", "nsec", "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" }, { 'C', "Lock", "R/W writer spin on readers", "nsec", "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" }, { 'C', "Lock", "R/W reader spin on writer", "nsec", "lockstat:::rw-spin", "arg2 != 0 && arg3 == 1" }, { 'C', "Lock", "R/W reader spin on write wanted", "nsec", "lockstat:::rw-spin", "arg2 != 0 && arg3 == 0 && arg4" }, { 'C', "Lock", "SX exclusive block", "nsec", "lockstat:::sx-block", "arg2 == 0" }, { 'C', "Lock", "SX shared block", "nsec", "lockstat:::sx-block", "arg2 != 0" }, { 'C', "Lock", "SX exclusive spin", "nsec", "lockstat:::sx-spin", "arg2 == 0" }, { 'C', "Lock", "SX shared spin", "nsec", "lockstat:::sx-spin", "arg2 != 0" }, { 'C', "Lock", "Unknown event (type 16)", "units" }, { 'C', "Lock", "Unknown event (type 17)", "units" }, { 'C', "Lock", "Unknown event (type 18)", "units" }, { 'C', "Lock", "Unknown event (type 19)", "units" }, { 'C', "Lock", "Unknown event (type 20)", "units" }, { 'C', "Lock", "Unknown event (type 21)", "units" }, { 'C', "Lock", "Unknown event (type 22)", "units" }, { 'C', "Lock", "Unknown event (type 23)", "units" }, { 'C', "Lock", "Unknown event (type 24)", "units" }, { 'C', "Lock", "Unknown event (type 25)", "units" }, { 'C', "Lock", "Unknown event (type 26)", "units" }, { 'C', "Lock", "Unknown event (type 27)", "units" }, { 'C', "Lock", "Unknown event (type 28)", "units" }, { 'C', "Lock", "Unknown event (type 29)", "units" }, { 'C', "Lock", "Unknown event (type 30)", "units" }, { 'C', "Lock", "Unknown event (type 31)", "units" }, { 'H', "Lock", "Adaptive mutex hold", "nsec", "lockstat:::adaptive-release", NULL, "lockstat:::adaptive-acquire" }, { 'H', "Lock", "Spin lock hold", "nsec", "lockstat:::spin-release", NULL, "lockstat:::spin-acquire" }, { 'H', "Lock", "R/W writer hold", "nsec", - "lockstat::rw_wunlock:rw-release", NULL, - "lockstat::rw_wlock:rw-acquire" }, + "lockstat:::rw-release", "arg1 == 0", + "lockstat:::rw-acquire" }, { 'H', "Lock", "R/W reader hold", "nsec", - "lockstat::rw_runlock:rw-release", NULL, - "lockstat::rw_rlock:rw-acquire" }, + "lockstat:::rw-release", "arg1 == 1", + "lockstat:::rw-acquire" }, { 'H', "Lock", "SX shared hold", "nsec", - "lockstat::sx_sunlock:sx-release", NULL, - "lockstat::sx_slock:sx-acquire" }, + "lockstat:::sx-release", "arg1 == 0", + "lockstat:::sx-acquire" }, { 'H', "Lock", "SX exclusive hold", "nsec", - "lockstat::sx_xunlock:sx-release", NULL, - "lockstat::sx_xlock:sx-acquire" }, + "lockstat:::sx-release", "arg1 == 1", + "lockstat:::sx-acquire" }, { 'H', "Lock", "Unknown event (type 38)", "units" }, { 'H', "Lock", "Unknown event (type 39)", "units" }, { 'H', "Lock", "Unknown event (type 40)", "units" }, { 'H', "Lock", "Unknown event (type 41)", "units" }, { 'H', "Lock", "Unknown event (type 42)", "units" }, { 'H', "Lock", "Unknown event (type 43)", "units" }, { 'H', "Lock", "Unknown event (type 44)", "units" }, { 'H', "Lock", "Unknown event (type 45)", "units" }, { 'H', "Lock", "Unknown event (type 46)", "units" }, { 'H', "Lock", "Unknown event (type 47)", "units" }, { 'H', "Lock", "Unknown event (type 48)", "units" }, { 'H', "Lock", "Unknown event (type 49)", "units" }, { 'H', "Lock", "Unknown event (type 50)", "units" }, { 'H', "Lock", "Unknown event (type 51)", "units" }, { 'H', "Lock", "Unknown event (type 52)", "units" }, { 'H', "Lock", "Unknown event (type 53)", "units" }, { 'H', "Lock", "Unknown event (type 54)", "units" }, { 'H', "Lock", "Unknown event (type 55)", "units" }, #ifdef illumos { 'I', "CPU+PIL", "Profiling interrupt", "nsec", #else { 'I', "CPU+Pri_Class", "Profiling interrupt", "nsec", #endif "profile:::profile-97", NULL }, { 'I', "Lock", "Unknown event (type 57)", "units" }, { 'I', "Lock", "Unknown event (type 58)", "units" }, { 'I', "Lock", "Unknown event (type 59)", "units" }, { 'E', "Lock", "Recursive lock entry detected", "(N/A)", "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" }, { 'E', "Lock", "Lockstat enter failure", "(N/A)" }, { 'E', "Lock", "Lockstat exit failure", "nsec" }, { 'E', "Lock", "Lockstat record failure", "(N/A)" }, }; #ifndef illumos static char *g_pri_class[] = { "", "Intr", "RealT", "TShar", "Idle" }; #endif static void fail(int do_perror, const char *message, ...) { va_list args; int save_errno = errno; va_start(args, message); (void) fprintf(stderr, "lockstat: "); (void) vfprintf(stderr, message, args); va_end(args); if (do_perror) (void) fprintf(stderr, ": %s", strerror(save_errno)); (void) fprintf(stderr, "\n"); exit(2); } static void dfail(const char *message, ...) { va_list args; va_start(args, message); (void) fprintf(stderr, "lockstat: "); (void) vfprintf(stderr, message, args); va_end(args); (void) fprintf(stderr, ": %s\n", dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); exit(2); } static void show_events(char event_type, char *desc) { int i, first = -1, last; for (i = 0; i < LS_MAX_EVENTS; i++) { ls_event_info_t *evp = &g_event_info[i]; if (evp->ev_type != event_type || strncmp(evp->ev_desc, "Unknown event", 13) == 0) continue; if (first == -1) first = i; last = i; } (void) fprintf(stderr, "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n", desc, event_type, first, last); for (i = first; i <= last; i++) (void) fprintf(stderr, "%4d = %s\n", i, g_event_info[i].ev_desc); } static void usage(void) { (void) fprintf(stderr, "Usage: lockstat [options] command [args]\n" "\nGeneral options:\n\n" " -V print the corresponding D program\n" "\nEvent selection options:\n\n" " -C watch contention events [on by default]\n" " -E watch error events [off by default]\n" " -H watch hold events [off by default]\n" " -I watch interrupt events [off by default]\n" " -A watch all lock events [equivalent to -CH]\n" " -e event_list only watch the specified events (shown below);\n" " is a comma-separated list of\n" " events or ranges of events, e.g. 1,4-7,35\n" " -i rate interrupt rate for -I [default: %d Hz]\n" "\nData gathering options:\n\n" " -b basic statistics (lock, caller, event count)\n" " -t timing for all events [default]\n" " -h histograms for event times\n" " -s depth stack traces deep\n" " -x opt[=val] enable or modify DTrace options\n" "\nData filtering options:\n\n" " -n nrecords maximum number of data records [default: %d]\n" " -l lock[,size] only watch , which can be specified as a\n" " symbolic name or hex address; defaults\n" " to the ELF symbol size if available, 1 if not\n" " -f func[,size] only watch events generated by \n" " -d duration only watch events longer than \n" " -T trace (rather than sample) events\n" "\nData reporting options:\n\n" " -c coalesce lock data for arrays like pse_mutex[]\n" " -k coalesce PCs within functions\n" " -g show total events generated by function\n" " -w wherever: don't distinguish events by caller\n" " -W whichever: don't distinguish events by lock\n" " -R display rates rather than counts\n" " -p parsable output format (awk(1)-friendly)\n" " -P sort lock data by (count * avg_time) product\n" " -D n only display top events of each type\n" " -o filename send output to \n", DEFAULT_HZ, DEFAULT_NRECS); show_events('C', "Contention"); show_events('H', "Hold-time"); show_events('I', "Interrupt"); show_events('E', "Error"); (void) fprintf(stderr, "\n"); exit(1); } static int lockcmp(lsrec_t *a, lsrec_t *b) { int i; if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); for (i = g_stkdepth - 1; i >= 0; i--) { if (a->ls_stack[i] < b->ls_stack[i]) return (-1); if (a->ls_stack[i] > b->ls_stack[i]) return (1); } if (a->ls_caller < b->ls_caller) return (-1); if (a->ls_caller > b->ls_caller) return (1); if (a->ls_lock < b->ls_lock) return (-1); if (a->ls_lock > b->ls_lock) return (1); return (0); } static int countcmp(lsrec_t *a, lsrec_t *b) { if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); return (b->ls_count - a->ls_count); } static int timecmp(lsrec_t *a, lsrec_t *b) { if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); if (a->ls_time < b->ls_time) return (1); if (a->ls_time > b->ls_time) return (-1); return (0); } static int lockcmp_anywhere(lsrec_t *a, lsrec_t *b) { if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); if (a->ls_lock < b->ls_lock) return (-1); if (a->ls_lock > b->ls_lock) return (1); return (0); } static int lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b) { if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); if (a->ls_lock < b->ls_lock) return (-1); if (a->ls_lock > b->ls_lock) return (1); return (b->ls_count - a->ls_count); } static int sitecmp_anylock(lsrec_t *a, lsrec_t *b) { int i; if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); for (i = g_stkdepth - 1; i >= 0; i--) { if (a->ls_stack[i] < b->ls_stack[i]) return (-1); if (a->ls_stack[i] > b->ls_stack[i]) return (1); } if (a->ls_caller < b->ls_caller) return (-1); if (a->ls_caller > b->ls_caller) return (1); return (0); } static int site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b) { int i; if (a->ls_event < b->ls_event) return (-1); if (a->ls_event > b->ls_event) return (1); for (i = g_stkdepth - 1; i >= 0; i--) { if (a->ls_stack[i] < b->ls_stack[i]) return (-1); if (a->ls_stack[i] > b->ls_stack[i]) return (1); } if (a->ls_caller < b->ls_caller) return (-1); if (a->ls_caller > b->ls_caller) return (1); return (b->ls_count - a->ls_count); } static void lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n) { int m = n / 2; int i, j; if (m > 1) lsmergesort(cmp, a, b, m); if (n - m > 1) lsmergesort(cmp, a + m, b + m, n - m); for (i = m; i > 0; i--) b[i - 1] = a[i - 1]; for (j = m - 1; j < n - 1; j++) b[n + m - j - 2] = a[j + 1]; while (i < j) *a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--]; *a = b[i]; } static void coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n) { int i, j; lsrec_t *target, *current; target = lock[0]; for (i = 1; i < n; i++) { current = lock[i]; if (cmp(current, target) != 0) { target = current; continue; } current->ls_event = LS_MAX_EVENTS; target->ls_count += current->ls_count; target->ls_refcnt += current->ls_refcnt; if (g_recsize < LS_TIME) continue; target->ls_time += current->ls_time; if (g_recsize < LS_HIST) continue; for (j = 0; j < 64; j++) target->ls_hist[j] += current->ls_hist[j]; } } static void coalesce_symbol(uintptr_t *addrp) { uintptr_t symoff; size_t symsize; if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize) *addrp -= symoff; } static void predicate_add(char **pred, char *what, char *cmp, uintptr_t value) { char *new; int len, newlen; if (what == NULL) return; if (*pred == NULL) { *pred = malloc(1); *pred[0] = '\0'; } len = strlen(*pred); newlen = len + strlen(what) + 32 + strlen("( && )"); new = malloc(newlen); if (*pred[0] != '\0') { if (cmp != NULL) { (void) sprintf(new, "(%s) && (%s %s 0x%p)", *pred, what, cmp, (void *)value); } else { (void) sprintf(new, "(%s) && (%s)", *pred, what); } } else { if (cmp != NULL) { (void) sprintf(new, "%s %s 0x%p", what, cmp, (void *)value); } else { (void) sprintf(new, "%s", what); } } free(*pred); *pred = new; } static void predicate_destroy(char **pred) { free(*pred); *pred = NULL; } static void filter_add(char **filt, char *what, uintptr_t base, uintptr_t size) { char buf[256], *c = buf, *new; int len, newlen; if (*filt == NULL) { *filt = malloc(1); *filt[0] = '\0'; } #ifdef illumos (void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ? " || " : "", what, (void *)base, what, (void *)(base + size)); #else (void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ? " || " : "", what, (void *)base, what, (void *)(base + size)); #endif newlen = (len = strlen(*filt) + 1) + strlen(c); new = malloc(newlen); bcopy(*filt, new, len); (void) strcat(new, c); free(*filt); *filt = new; } static void filter_destroy(char **filt) { free(*filt); *filt = NULL; } static void dprog_add(const char *fmt, ...) { va_list args; int size, offs; char c; va_start(args, fmt); size = vsnprintf(&c, 1, fmt, args) + 1; va_end(args); if (g_proglen == 0) { offs = 0; } else { offs = g_proglen - 1; } g_proglen = offs + size; if ((g_prog = realloc(g_prog, g_proglen)) == NULL) fail(1, "failed to reallocate program text"); va_start(args, fmt); (void) vsnprintf(&g_prog[offs], size, fmt, args); va_end(args); } /* * This function may read like an open sewer, but keep in mind that programs * that generate other programs are rarely pretty. If one has the unenviable * task of maintaining or -- worse -- extending this code, use the -V option * to examine the D program as generated by this function. */ static void dprog_addevent(int event) { ls_event_info_t *info = &g_event_info[event]; char *pred = NULL; char stack[20]; const char *arg0, *caller; char *arg1 = "arg1"; char buf[80]; hrtime_t dur; int depth; if (info->ev_name[0] == '\0') return; if (info->ev_type == 'I') { /* * For interrupt events, arg0 (normally the lock pointer) is * the CPU address plus the current pil, and arg1 (normally * the number of nanoseconds) is the number of nanoseconds * late -- and it's stored in arg2. */ #ifdef illumos arg0 = "(uintptr_t)curthread->t_cpu + \n" "\t curthread->t_cpu->cpu_profile_pil"; #else arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n" "\t 0x01000000 + curthread->td_pri_class"; #endif caller = "(uintptr_t)arg0"; arg1 = "arg2"; } else { arg0 = "(uintptr_t)arg0"; caller = "caller"; } if (g_recsize > LS_HIST) { for (depth = 0; g_recsize > LS_STACK(depth); depth++) continue; if (g_tracing) { (void) sprintf(stack, "\tstack(%d);\n", depth); } else { (void) sprintf(stack, ", stack(%d)", depth); } } else { (void) sprintf(stack, ""); } if (info->ev_acquire != NULL) { /* * If this is a hold event, we need to generate an additional * clause for the acquire; the clause for the release will be * generated with the aggregating statement, below. */ dprog_add("%s\n", info->ev_acquire); predicate_add(&pred, info->ev_predicate, NULL, 0); predicate_add(&pred, g_predicate, NULL, 0); if (pred != NULL) dprog_add("/%s/\n", pred); dprog_add("{\n"); (void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event); if (info->ev_type == 'H') { dprog_add("\t%s = timestamp;\n", buf); } else { /* * If this isn't a hold event, it's the recursive * error event. For this, we simply bump the * thread-local, per-lock count. */ dprog_add("\t%s++;\n", buf); } dprog_add("}\n\n"); predicate_destroy(&pred); pred = NULL; if (info->ev_type == 'E') { /* * If this is the recursive lock error event, we need * to generate an additional clause to decrement the * thread-local, per-lock count. This assures that we * only execute the aggregating clause if we have * recursive entry. */ dprog_add("%s\n", info->ev_name); dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf); } predicate_add(&pred, buf, NULL, 0); if (info->ev_type == 'H') { (void) sprintf(buf, "timestamp -\n\t " "self->ev%d[(uintptr_t)arg0]", event); } arg1 = buf; } else { predicate_add(&pred, info->ev_predicate, NULL, 0); if (info->ev_type != 'I') predicate_add(&pred, g_predicate, NULL, 0); else predicate_add(&pred, g_ipredicate, NULL, 0); } if ((dur = g_min_duration[event]) != 0) predicate_add(&pred, arg1, ">=", dur); dprog_add("%s\n", info->ev_name); if (pred != NULL) dprog_add("/%s/\n", pred); predicate_destroy(&pred); dprog_add("{\n"); if (g_tracing) { dprog_add("\ttrace(%dULL);\n", event); dprog_add("\ttrace(%s);\n", arg0); dprog_add("\ttrace(%s);\n", caller); dprog_add(stack); } else { /* * The ordering here is important: when we process the * aggregate, we count on the fact that @avg appears before * @hist in program order to assure that @avg is assigned the * first aggregation variable ID and @hist assigned the * second; see the comment in process_aggregate() for details. */ dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n", event, arg0, caller, stack, arg1); if (g_recsize >= LS_HIST) { dprog_add("\t@hist[%dULL, %s, %s%s] = quantize" "(%s);\n", event, arg0, caller, stack, arg1); } } if (info->ev_acquire != NULL) dprog_add("\tself->ev%d[arg0] = 0;\n", event); dprog_add("}\n\n"); } static void dprog_compile() { dtrace_prog_t *prog; dtrace_proginfo_t info; if (g_Vflag) { (void) fprintf(stderr, "lockstat: vvvv D program vvvv\n"); (void) fputs(g_prog, stderr); (void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n"); } if ((prog = dtrace_program_strcompile(g_dtp, g_prog, DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL) dfail("failed to compile program"); if (dtrace_program_exec(g_dtp, prog, &info) == -1) dfail("failed to enable probes"); if (dtrace_go(g_dtp) != 0) dfail("couldn't start tracing"); } static void #ifdef illumos status_fire(void) #else status_fire(int i) #endif {} static void status_init(void) { dtrace_optval_t val, status, agg; struct sigaction act; struct itimerspec ts; struct sigevent ev; timer_t tid; if (dtrace_getopt(g_dtp, "statusrate", &status) == -1) dfail("failed to get 'statusrate'"); if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1) dfail("failed to get 'statusrate'"); /* * We would want to awaken at a rate that is the GCD of the statusrate * and the aggrate -- but that seems a bit absurd. Instead, we'll * simply awaken at a rate that is the more frequent of the two, which * assures that we're never later than the interval implied by the * more frequent rate. */ val = status < agg ? status : agg; (void) sigemptyset(&act.sa_mask); act.sa_flags = 0; act.sa_handler = status_fire; (void) sigaction(SIGUSR1, &act, NULL); ev.sigev_notify = SIGEV_SIGNAL; ev.sigev_signo = SIGUSR1; if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1) dfail("cannot create CLOCK_REALTIME timer"); ts.it_value.tv_sec = val / NANOSEC; ts.it_value.tv_nsec = val % NANOSEC; ts.it_interval = ts.it_value; if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1) dfail("cannot set time on CLOCK_REALTIME timer"); } static void status_check(void) { if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0) dfail("failed to snap aggregate"); if (dtrace_status(g_dtp) == -1) dfail("dtrace_status()"); } static void lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data) { bzero(lsrec, g_recsize); lsrec->ls_count = 1; if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3)) fail(0, "truncated DTrace record"); if (rec->dtrd_size != sizeof (uint64_t)) fail(0, "bad event size in first record"); /* LINTED - alignment */ lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset)); rec++; if (rec->dtrd_size != sizeof (uintptr_t)) fail(0, "bad lock address size in second record"); /* LINTED - alignment */ lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset)); rec++; if (rec->dtrd_size != sizeof (uintptr_t)) fail(0, "bad caller size in third record"); /* LINTED - alignment */ lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset)); rec++; if (g_recsize > LS_HIST) { int frames, i; pc_t *stack; frames = rec->dtrd_size / sizeof (pc_t); /* LINTED - alignment */ stack = (pc_t *)(data + rec->dtrd_offset); for (i = 1; i < frames; i++) lsrec->ls_stack[i - 1] = stack[i]; } } /*ARGSUSED*/ static int count_aggregate(const dtrace_aggdata_t *agg, void *arg) { *((size_t *)arg) += 1; return (DTRACE_AGGWALK_NEXT); } static int process_aggregate(const dtrace_aggdata_t *agg, void *arg) { const dtrace_aggdesc_t *aggdesc = agg->dtada_desc; caddr_t data = agg->dtada_data; lsdata_t *lsdata = arg; lsrec_t *lsrec = lsdata->lsd_next; const dtrace_recdesc_t *rec; uint64_t *avg, *quantized; int i, j; assert(lsdata->lsd_count < g_nrecs); /* * Aggregation variable IDs are guaranteed to be generated in program * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE * plus one. As "avg" appears before "hist" in program order, we know * that "avg" will be allocated the first aggregation variable ID, and * "hist" will be allocated the second aggregation variable ID -- and * we therefore use the aggregation variable ID to differentiate the * cases. */ if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) { /* * If this is the histogram entry. We'll copy the quantized * data into lc_hist, and jump over the rest. */ rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2) fail(0, "bad variable ID in aggregation record"); if (rec->dtrd_size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) fail(0, "bad quantize size in aggregation record"); /* LINTED - alignment */ quantized = (uint64_t *)(data + rec->dtrd_offset); for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0; i < DTRACE_QUANTIZE_NBUCKETS; i++, j++) lsrec->ls_hist[j] = quantized[i]; goto out; } lsrec_fill(lsrec, &aggdesc->dtagd_rec[1], aggdesc->dtagd_nrecs - 1, data); rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1]; if (rec->dtrd_size != 2 * sizeof (uint64_t)) fail(0, "bad avg size in aggregation record"); /* LINTED - alignment */ avg = (uint64_t *)(data + rec->dtrd_offset); lsrec->ls_count = (uint32_t)avg[0]; lsrec->ls_time = (uintptr_t)avg[1]; if (g_recsize >= LS_HIST) return (DTRACE_AGGWALK_NEXT); out: lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize); lsdata->lsd_count++; return (DTRACE_AGGWALK_NEXT); } static int process_trace(const dtrace_probedata_t *pdata, void *arg) { lsdata_t *lsdata = arg; lsrec_t *lsrec = lsdata->lsd_next; dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc; caddr_t data = pdata->dtpda_data; if (lsdata->lsd_count >= g_nrecs) return (DTRACE_CONSUME_NEXT); lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data); lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize); lsdata->lsd_count++; return (DTRACE_CONSUME_NEXT); } static int process_data(FILE *out, char *data) { lsdata_t lsdata; /* LINTED - alignment */ lsdata.lsd_next = (lsrec_t *)data; lsdata.lsd_count = 0; if (g_tracing) { if (dtrace_consume(g_dtp, out, process_trace, NULL, &lsdata) != 0) dfail("failed to consume buffer"); return (lsdata.lsd_count); } if (dtrace_aggregate_walk_keyvarsorted(g_dtp, process_aggregate, &lsdata) != 0) dfail("failed to walk aggregate"); return (lsdata.lsd_count); } /*ARGSUSED*/ static int drophandler(const dtrace_dropdata_t *data, void *arg) { g_dropped++; (void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg); return (DTRACE_HANDLE_OK); } int main(int argc, char **argv) { char *data_buf; lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf; FILE *out = stdout; int c; pid_t child; int status; int i, j; hrtime_t duration; char *addrp, *offp, *sizep, *evp, *lastp, *p; uintptr_t addr; size_t size, off; int events_specified = 0; int exec_errno = 0; uint32_t event; char *filt = NULL, *ifilt = NULL; static uint64_t ev_count[LS_MAX_EVENTS + 1]; static uint64_t ev_time[LS_MAX_EVENTS + 1]; dtrace_optval_t aggsize; char aggstr[10]; long ncpus; int dynvar = 0; int err; if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) { fail(0, "cannot open dtrace library: %s", dtrace_errmsg(NULL, err)); } if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1) dfail("couldn't establish drop handler"); if (symtab_init() == -1) fail(1, "can't load kernel symbols"); g_nrecs = DEFAULT_NRECS; while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) { switch (c) { case 'b': g_recsize = LS_BASIC; break; case 't': g_recsize = LS_TIME; break; case 'h': g_recsize = LS_HIST; break; case 's': if (!isdigit(optarg[0])) usage(); g_stkdepth = atoi(optarg); if (g_stkdepth > LS_MAX_STACK_DEPTH) fail(0, "max stack depth is %d", LS_MAX_STACK_DEPTH); g_recsize = LS_STACK(g_stkdepth); break; case 'n': if (!isdigit(optarg[0])) usage(); g_nrecs = atoi(optarg); break; case 'd': if (!isdigit(optarg[0])) usage(); duration = atoll(optarg); /* * XXX -- durations really should be per event * since the units are different, but it's hard * to express this nicely in the interface. * Not clear yet what the cleanest solution is. */ for (i = 0; i < LS_MAX_EVENTS; i++) if (g_event_info[i].ev_type != 'E') g_min_duration[i] = duration; break; case 'i': if (!isdigit(optarg[0])) usage(); i = atoi(optarg); if (i <= 0) usage(); if (i > MAX_HZ) fail(0, "max interrupt rate is %d Hz", MAX_HZ); for (j = 0; j < LS_MAX_EVENTS; j++) if (strcmp(g_event_info[j].ev_desc, "Profiling interrupt") == 0) break; (void) sprintf(g_event_info[j].ev_name, "profile:::profile-%d", i); break; case 'l': case 'f': addrp = strtok(optarg, ","); sizep = strtok(NULL, ","); addrp = strtok(optarg, ",+"); offp = strtok(NULL, ","); size = sizep ? strtoul(sizep, NULL, 0) : 1; off = offp ? strtoul(offp, NULL, 0) : 0; if (addrp[0] == '0') { addr = strtoul(addrp, NULL, 16) + off; } else { addr = sym_to_addr(addrp) + off; if (sizep == NULL) size = sym_size(addrp) - off; if (addr - off == 0) fail(0, "symbol '%s' not found", addrp); if (size == 0) size = 1; } if (c == 'l') { filter_add(&filt, "arg0", addr, size); } else { filter_add(&filt, "caller", addr, size); filter_add(&ifilt, "arg0", addr, size); } break; case 'e': evp = strtok_r(optarg, ",", &lastp); while (evp) { int ev1, ev2; char *evp2; (void) strtok(evp, "-"); evp2 = strtok(NULL, "-"); ev1 = atoi(evp); ev2 = evp2 ? atoi(evp2) : ev1; if ((uint_t)ev1 >= LS_MAX_EVENTS || (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2) fail(0, "-e events out of range"); for (i = ev1; i <= ev2; i++) g_enabled[i] = 1; evp = strtok_r(NULL, ",", &lastp); } events_specified = 1; break; case 'c': g_cflag = 1; break; case 'k': g_kflag = 1; break; case 'w': g_wflag = 1; break; case 'W': g_Wflag = 1; break; case 'g': g_gflag = 1; break; case 'C': case 'E': case 'H': case 'I': for (i = 0; i < LS_MAX_EVENTS; i++) if (g_event_info[i].ev_type == c) g_enabled[i] = 1; events_specified = 1; break; case 'A': for (i = 0; i < LS_MAX_EVENTS; i++) if (strchr("CH", g_event_info[i].ev_type)) g_enabled[i] = 1; events_specified = 1; break; case 'T': g_tracing = 1; break; case 'D': if (!isdigit(optarg[0])) usage(); g_topn = atoi(optarg); break; case 'R': g_rates = 1; break; case 'p': g_pflag = 1; break; case 'P': g_Pflag = 1; break; case 'o': if ((out = fopen(optarg, "w")) == NULL) fail(1, "error opening file"); break; case 'V': g_Vflag = 1; break; default: if (strchr(LOCKSTAT_OPTSTR, c) == NULL) usage(); } } if (filt != NULL) { predicate_add(&g_predicate, filt, NULL, 0); filter_destroy(&filt); } if (ifilt != NULL) { predicate_add(&g_ipredicate, ifilt, NULL, 0); filter_destroy(&ifilt); } if (g_recsize == 0) { if (g_gflag) { g_stkdepth = LS_MAX_STACK_DEPTH; g_recsize = LS_STACK(g_stkdepth); } else { g_recsize = LS_TIME; } } if (g_gflag && g_recsize <= LS_STACK(0)) fail(0, "'-g' requires at least '-s 1' data gathering"); /* * Make sure the alignment is reasonable */ g_recsize = -(-g_recsize & -sizeof (uint64_t)); for (i = 0; i < LS_MAX_EVENTS; i++) { /* * If no events were specified, enable -C. */ if (!events_specified && g_event_info[i].ev_type == 'C') g_enabled[i] = 1; } for (i = 0; i < LS_MAX_EVENTS; i++) { if (!g_enabled[i]) continue; if (g_event_info[i].ev_acquire != NULL) { /* * If we've enabled a hold event, we must explicitly * allocate dynamic variable space. */ dynvar = 1; } dprog_addevent(i); } /* * Make sure there are remaining arguments to specify a child command * to execute. */ if (argc <= optind) usage(); if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1) dfail("couldn't determine number of online CPUs"); /* * By default, we set our data buffer size to be the number of records * multiplied by the size of the record, doubled to account for some * DTrace slop and divided by the number of CPUs. We silently clamp * the aggregation size at both a minimum and a maximum to prevent * absurdly low or high values. */ if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE) aggsize = MIN_AGGSIZE; if (aggsize > MAX_AGGSIZE) aggsize = MAX_AGGSIZE; (void) sprintf(aggstr, "%lld", (long long)aggsize); if (!g_tracing) { if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1) dfail("failed to set 'bufsize'"); if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1) dfail("failed to set 'aggsize'"); if (dynvar) { /* * If we're using dynamic variables, we set our * dynamic variable size to be one megabyte per CPU, * with a hard-limit of 32 megabytes. This may still * be too small in some cases, but it can be tuned * manually via -x if need be. */ (void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32); if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1) dfail("failed to set 'dynvarsize'"); } } else { if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1) dfail("failed to set 'bufsize'"); } if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1) dfail("failed to set 'statusrate'"); optind = 1; while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) { switch (c) { case 'x': if ((p = strchr(optarg, '=')) != NULL) *p++ = '\0'; if (dtrace_setopt(g_dtp, optarg, p) != 0) dfail("failed to set -x %s", optarg); break; } } argc -= optind; argv += optind; dprog_compile(); status_init(); g_elapsed = -gethrtime(); /* * Spawn the specified command and wait for it to complete. */ child = fork(); if (child == -1) fail(1, "cannot fork"); if (child == 0) { (void) dtrace_close(g_dtp); (void) execvp(argv[0], &argv[0]); exec_errno = errno; exit(127); } #ifdef illumos while (waitpid(child, &status, WEXITED) != child) #else while (waitpid(child, &status, 0) != child) #endif status_check(); g_elapsed += gethrtime(); if (WIFEXITED(status)) { if (WEXITSTATUS(status) != 0) { if (exec_errno != 0) { errno = exec_errno; fail(1, "could not execute %s", argv[0]); } (void) fprintf(stderr, "lockstat: warning: %s exited with code %d\n", argv[0], WEXITSTATUS(status)); } } else { (void) fprintf(stderr, "lockstat: warning: %s died on signal %d\n", argv[0], WTERMSIG(status)); } if (dtrace_stop(g_dtp) == -1) dfail("failed to stop dtrace"); /* * Before we read out the results, we need to allocate our buffer. * If we're tracing, then we'll just use the precalculated size. If * we're not, then we'll take a snapshot of the aggregate, and walk * it to count the number of records. */ if (!g_tracing) { if (dtrace_aggregate_snap(g_dtp) != 0) dfail("failed to snap aggregate"); g_nrecs = 0; if (dtrace_aggregate_walk(g_dtp, count_aggregate, &g_nrecs) != 0) dfail("failed to walk aggregate"); } #ifdef illumos if ((data_buf = memalign(sizeof (uint64_t), (g_nrecs + 1) * g_recsize)) == NULL) #else if (posix_memalign((void **)&data_buf, sizeof (uint64_t), (g_nrecs + 1) * g_recsize) ) #endif fail(1, "Memory allocation failed"); /* * Read out the DTrace data. */ g_nrecs_used = process_data(out, data_buf); if (g_nrecs_used > g_nrecs || g_dropped) (void) fprintf(stderr, "lockstat: warning: " "ran out of data records (use -n for more)\n"); /* LINTED - alignment */ for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++, /* LINTED - alignment */ lsp = (lsrec_t *)((char *)lsp + g_recsize)) { ev_count[lsp->ls_event] += lsp->ls_count; ev_time[lsp->ls_event] += lsp->ls_time; } /* * If -g was specified, convert stacks into individual records. */ if (g_gflag) { lsrec_t *newlsp, *oldlsp; #ifdef illumos newlsp = memalign(sizeof (uint64_t), g_nrecs_used * LS_TIME * (g_stkdepth + 1)); #else posix_memalign((void **)&newlsp, sizeof (uint64_t), g_nrecs_used * LS_TIME * (g_stkdepth + 1)); #endif if (newlsp == NULL) fail(1, "Cannot allocate space for -g processing"); lsp = newlsp; /* LINTED - alignment */ for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++, /* LINTED - alignment */ oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) { int fr; int caller_in_stack = 0; if (oldlsp->ls_count == 0) continue; for (fr = 0; fr < g_stkdepth; fr++) { if (oldlsp->ls_stack[fr] == 0) break; if (oldlsp->ls_stack[fr] == oldlsp->ls_caller) caller_in_stack = 1; bcopy(oldlsp, lsp, LS_TIME); lsp->ls_caller = oldlsp->ls_stack[fr]; /* LINTED - alignment */ lsp = (lsrec_t *)((char *)lsp + LS_TIME); } if (!caller_in_stack) { bcopy(oldlsp, lsp, LS_TIME); /* LINTED - alignment */ lsp = (lsrec_t *)((char *)lsp + LS_TIME); } } g_nrecs = g_nrecs_used = ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME; g_recsize = LS_TIME; g_stkdepth = 0; free(data_buf); data_buf = (char *)newlsp; } if ((sort_buf = calloc(2 * (g_nrecs + 1), sizeof (void *))) == NULL) fail(1, "Sort buffer allocation failed"); merge_buf = sort_buf + (g_nrecs + 1); /* * Build the sort buffer, discarding zero-count records along the way. */ /* LINTED - alignment */ for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++, /* LINTED - alignment */ lsp = (lsrec_t *)((char *)lsp + g_recsize)) { if (lsp->ls_count == 0) lsp->ls_event = LS_MAX_EVENTS; sort_buf[i] = lsp; } if (g_nrecs_used == 0) exit(0); /* * Add a sentinel after the last record */ sort_buf[i] = lsp; lsp->ls_event = LS_MAX_EVENTS; if (g_tracing) { report_trace(out, sort_buf); return (0); } /* * Application of -g may have resulted in multiple records * with the same signature; coalesce them. */ if (g_gflag) { mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used); coalesce(lockcmp, sort_buf, g_nrecs_used); } /* * Coalesce locks within the same symbol if -c option specified. * Coalesce PCs within the same function if -k option specified. */ if (g_cflag || g_kflag) { for (i = 0; i < g_nrecs_used; i++) { int fr; lsp = sort_buf[i]; if (g_cflag) coalesce_symbol(&lsp->ls_lock); if (g_kflag) { for (fr = 0; fr < g_stkdepth; fr++) coalesce_symbol(&lsp->ls_stack[fr]); coalesce_symbol(&lsp->ls_caller); } } mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used); coalesce(lockcmp, sort_buf, g_nrecs_used); } /* * Coalesce callers if -w option specified */ if (g_wflag) { mergesort(lock_and_count_cmp_anywhere, sort_buf, merge_buf, g_nrecs_used); coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used); } /* * Coalesce locks if -W option specified */ if (g_Wflag) { mergesort(site_and_count_cmp_anylock, sort_buf, merge_buf, g_nrecs_used); coalesce(sitecmp_anylock, sort_buf, g_nrecs_used); } /* * Sort data by contention count (ls_count) or total time (ls_time), * depending on g_Pflag. Override g_Pflag if time wasn't measured. */ if (g_recsize < LS_TIME) g_Pflag = 0; if (g_Pflag) mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used); else mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used); /* * Display data by event type */ first = &sort_buf[0]; while ((event = (*first)->ls_event) < LS_MAX_EVENTS) { current = first; while ((lsp = *current)->ls_event == event) current++; report_stats(out, first, current - first, ev_count[event], ev_time[event]); first = current; } return (0); } static char * format_symbol(char *buf, uintptr_t addr, int show_size) { uintptr_t symoff; char *symname; size_t symsize; symname = addr_to_sym(addr, &symoff, &symsize); if (show_size && symoff == 0) (void) sprintf(buf, "%s[%ld]", symname, (long)symsize); else if (symoff == 0) (void) sprintf(buf, "%s", symname); else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0) /* CPU+PIL */ #ifdef illumos (void) sprintf(buf, "%s+%ld", symname, (long)symoff); #else (void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]); #endif else if (symoff <= symsize || (symoff < 256 && addr != symoff)) (void) sprintf(buf, "%s+0x%llx", symname, (unsigned long long)symoff); else (void) sprintf(buf, "0x%llx", (unsigned long long)addr); return (buf); } static void report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count, uint64_t total_time) { uint32_t event = sort_buf[0]->ls_event; lsrec_t *lsp; double ptotal = 0.0; double percent; int i, j, fr; int displayed; int first_bin, last_bin, max_bin_count, total_bin_count; int rectype; char buf[256]; char lhdr[80], chdr[80]; rectype = g_recsize; if (g_topn == 0) { (void) fprintf(out, "%20llu %s\n", g_rates == 0 ? total_count : ((unsigned long long)total_count * NANOSEC) / g_elapsed, g_event_info[event].ev_desc); return; } (void) sprintf(lhdr, "%s%s", g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr); (void) sprintf(chdr, "%s%s", g_wflag ? "Hottest " : "", "Caller"); if (!g_pflag) (void) fprintf(out, "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n", g_event_info[event].ev_desc, (double)total_count, (double)g_elapsed / NANOSEC, (double)total_count * NANOSEC / g_elapsed); if (!g_pflag && rectype < LS_HIST) { (void) sprintf(buf, "%s", g_event_info[event].ev_units); (void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n", g_rates ? "ops/s" : "Count", g_gflag ? "genr" : "indv", "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr); (void) fprintf(out, "---------------------------------" "----------------------------------------------\n"); } displayed = 0; for (i = 0; i < nrecs; i++) { lsp = sort_buf[i]; if (displayed++ >= g_topn) break; if (g_pflag) { int j; (void) fprintf(out, "%u %u", lsp->ls_event, lsp->ls_count); (void) fprintf(out, " %s", format_symbol(buf, lsp->ls_lock, g_cflag)); (void) fprintf(out, " %s", format_symbol(buf, lsp->ls_caller, 0)); (void) fprintf(out, " %f", (double)lsp->ls_refcnt / lsp->ls_count); if (rectype >= LS_TIME) (void) fprintf(out, " %llu", (unsigned long long)lsp->ls_time); if (rectype >= LS_HIST) { for (j = 0; j < 64; j++) (void) fprintf(out, " %u", lsp->ls_hist[j]); } for (j = 0; j < LS_MAX_STACK_DEPTH; j++) { if (rectype <= LS_STACK(j) || lsp->ls_stack[j] == 0) break; (void) fprintf(out, " %s", format_symbol(buf, lsp->ls_stack[j], 0)); } (void) fprintf(out, "\n"); continue; } if (rectype >= LS_HIST) { (void) fprintf(out, "---------------------------------" "----------------------------------------------\n"); (void) sprintf(buf, "%s", g_event_info[event].ev_units); (void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n", g_rates ? "ops/s" : "Count", g_gflag ? "genr" : "indv", "cuml", "rcnt", buf, lhdr, chdr); } if (g_Pflag && total_time != 0) percent = (lsp->ls_time * 100.00) / total_time; else percent = (lsp->ls_count * 100.00) / total_count; ptotal += percent; if (rectype >= LS_TIME) (void) sprintf(buf, "%llu", (unsigned long long)(lsp->ls_time / lsp->ls_count)); else buf[0] = '\0'; (void) fprintf(out, "%5llu ", g_rates == 0 ? lsp->ls_count : ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed); (void) fprintf(out, "%3.0f%% ", percent); if (g_gflag) (void) fprintf(out, "---- "); else (void) fprintf(out, "%3.0f%% ", ptotal); (void) fprintf(out, "%4.2f %8s ", (double)lsp->ls_refcnt / lsp->ls_count, buf); (void) fprintf(out, "%-22s ", format_symbol(buf, lsp->ls_lock, g_cflag)); (void) fprintf(out, "%-24s\n", format_symbol(buf, lsp->ls_caller, 0)); if (rectype < LS_HIST) continue; (void) fprintf(out, "\n"); (void) fprintf(out, "%10s %31s %-9s %-24s\n", g_event_info[event].ev_units, "------ Time Distribution ------", g_rates ? "ops/s" : "count", rectype > LS_STACK(0) ? "Stack" : ""); first_bin = 0; while (lsp->ls_hist[first_bin] == 0) first_bin++; last_bin = 63; while (lsp->ls_hist[last_bin] == 0) last_bin--; max_bin_count = 0; total_bin_count = 0; for (j = first_bin; j <= last_bin; j++) { total_bin_count += lsp->ls_hist[j]; if (lsp->ls_hist[j] > max_bin_count) max_bin_count = lsp->ls_hist[j]; } /* * If we went a few frames below the caller, ignore them */ for (fr = 3; fr > 0; fr--) if (lsp->ls_stack[fr] == lsp->ls_caller) break; for (j = first_bin; j <= last_bin; j++) { uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count; (void) fprintf(out, "%10llu |%s%s %-9u ", 1ULL << j, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth, " " + depth, g_rates == 0 ? lsp->ls_hist[j] : (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) / g_elapsed)); if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) { (void) fprintf(out, "\n"); continue; } (void) fprintf(out, "%-24s\n", format_symbol(buf, lsp->ls_stack[fr], 0)); fr++; } while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) { (void) fprintf(out, "%15s %-36s %-24s\n", "", "", format_symbol(buf, lsp->ls_stack[fr], 0)); fr++; } } if (!g_pflag) (void) fprintf(out, "---------------------------------" "----------------------------------------------\n"); (void) fflush(out); } static void report_trace(FILE *out, lsrec_t **sort_buf) { lsrec_t *lsp; int i, fr; int rectype; char buf[256], buf2[256]; rectype = g_recsize; if (!g_pflag) { (void) fprintf(out, "%5s %7s %11s %-24s %-24s\n", "Event", "Time", "Owner", "Lock", "Caller"); (void) fprintf(out, "---------------------------------" "----------------------------------------------\n"); } for (i = 0; i < g_nrecs_used; i++) { lsp = sort_buf[i]; if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0) continue; (void) fprintf(out, "%2d %10llu %11p %-24s %-24s\n", lsp->ls_event, (unsigned long long)lsp->ls_time, (void *)lsp->ls_next, format_symbol(buf, lsp->ls_lock, 0), format_symbol(buf2, lsp->ls_caller, 0)); if (rectype <= LS_STACK(0)) continue; /* * If we went a few frames below the caller, ignore them */ for (fr = 3; fr > 0; fr--) if (lsp->ls_stack[fr] == lsp->ls_caller) break; while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) { (void) fprintf(out, "%53s %-24s\n", "", format_symbol(buf, lsp->ls_stack[fr], 0)); fr++; } (void) fprintf(out, "\n"); } (void) fflush(out); } Index: head/sys/kern/kern_lockstat.c =================================================================== --- head/sys/kern/kern_lockstat.c (revision 285703) +++ head/sys/kern/kern_lockstat.c (revision 285704) @@ -1,81 +1,81 @@ /*- * Copyright 2008-2009 Stacey Son * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include SDT_PROVIDER_DEFINE(lockstat); SDT_PROBE_DEFINE1(lockstat, , , adaptive__acquire, "struct mtx *"); SDT_PROBE_DEFINE1(lockstat, , , adaptive__release, "struct mtx *"); SDT_PROBE_DEFINE2(lockstat, , , adaptive__spin, "struct mtx *", "uint64_t"); SDT_PROBE_DEFINE2(lockstat, , , adaptive__block, "struct mtx *", "uint64_t"); SDT_PROBE_DEFINE1(lockstat, , , spin__acquire, "struct mtx *"); SDT_PROBE_DEFINE1(lockstat, , , spin__release, "struct mtx *"); SDT_PROBE_DEFINE2(lockstat, , , spin__spin, "struct mtx *", "uint64_t"); -SDT_PROBE_DEFINE1(lockstat, , , rw__acquire, "struct rwlock *"); -SDT_PROBE_DEFINE1(lockstat, , , rw__release, "struct rwlock *"); +SDT_PROBE_DEFINE2(lockstat, , , rw__acquire, "struct rwlock *", "int"); +SDT_PROBE_DEFINE2(lockstat, , , rw__release, "struct rwlock *", "int"); SDT_PROBE_DEFINE5(lockstat, , , rw__block, "struct rwlock *", "uint64_t", "int", "int", "int"); SDT_PROBE_DEFINE2(lockstat, , , rw__spin, "struct rwlock *", "uint64_t"); SDT_PROBE_DEFINE1(lockstat, , , rw__upgrade, "struct rwlock *"); SDT_PROBE_DEFINE1(lockstat, , , rw__downgrade, "struct rwlock *"); -SDT_PROBE_DEFINE1(lockstat, , , sx__acquire, "struct sx *"); -SDT_PROBE_DEFINE1(lockstat, , , sx__release, "struct sx *"); +SDT_PROBE_DEFINE2(lockstat, , , sx__acquire, "struct sx *", "int"); +SDT_PROBE_DEFINE2(lockstat, , , sx__release, "struct sx *", "int"); SDT_PROBE_DEFINE5(lockstat, , , sx__block, "struct sx *", "uint64_t", "int", "int", "int"); SDT_PROBE_DEFINE2(lockstat, , , sx__spin, "struct sx *", "uint64_t"); SDT_PROBE_DEFINE1(lockstat, , , sx__upgrade, "struct sx *"); SDT_PROBE_DEFINE1(lockstat, , , sx__downgrade, "struct sx *"); SDT_PROBE_DEFINE2(lockstat, , , thread__spin, "struct mtx *", "uint64_t"); int lockstat_enabled = 0; uint64_t lockstat_nsecs(struct lock_object *lo) { struct bintime bt; uint64_t ns; if (!lockstat_enabled) return (0); if ((lo->lo_flags & LO_NOPROFILE) != 0) return (0); binuptime(&bt); ns = bt.sec * (uint64_t)1000000000; ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32; return (ns); } Index: head/sys/kern/kern_rwlock.c =================================================================== --- head/sys/kern/kern_rwlock.c (revision 285703) +++ head/sys/kern/kern_rwlock.c (revision 285704) @@ -1,1274 +1,1274 @@ /*- * Copyright (c) 2006 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Machine independent bits of reader/writer lock implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_no_adaptive_rwlocks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) #define ADAPTIVE_RWLOCKS #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* * Return the rwlock address when the lock cookie address is provided. * This functionality assumes that struct rwlock* have a member named rw_lock. */ #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) #ifdef ADAPTIVE_RWLOCKS static int rowner_retries = 10; static int rowner_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging"); SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); #endif #ifdef DDB #include static void db_show_rwlock(const struct lock_object *lock); #endif static void assert_rw(const struct lock_object *lock, int what); static void lock_rw(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_rw(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_rw(struct lock_object *lock); struct lock_class lock_class_rw = { .lc_name = "rw", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_rw, #ifdef DDB .lc_ddb_show = db_show_rwlock, #endif .lc_lock = lock_rw, .lc_unlock = unlock_rw, #ifdef KDTRACE_HOOKS .lc_owner = owner_rw, #endif }; /* * Return a pointer to the owning thread if the lock is write-locked or * NULL if the lock is unlocked or read-locked. */ #define rw_wowner(rw) \ ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ (struct thread *)RW_OWNER((rw)->rw_lock)) /* * Returns if a write owner is recursed. Write ownership is not assured * here and should be previously checked. */ #define rw_recursed(rw) ((rw)->rw_recurse != 0) /* * Return true if curthread helds the lock. */ #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) /* * Return a pointer to the owning thread for this lock who should receive * any priority lent by threads that block on this lock. Currently this * is identical to rw_wowner(). */ #define rw_owner(rw) rw_wowner(rw) #ifndef INVARIANTS #define __rw_assert(c, what, file, line) #endif void assert_rw(const struct lock_object *lock, int what) { rw_assert((const struct rwlock *)lock, what); } void lock_rw(struct lock_object *lock, uintptr_t how) { struct rwlock *rw; rw = (struct rwlock *)lock; if (how) rw_rlock(rw); else rw_wlock(rw); } uintptr_t unlock_rw(struct lock_object *lock) { struct rwlock *rw; rw = (struct rwlock *)lock; rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); if (rw->rw_lock & RW_LOCK_READ) { rw_runlock(rw); return (1); } else { rw_wunlock(rw); return (0); } } #ifdef KDTRACE_HOOKS int owner_rw(const struct lock_object *lock, struct thread **owner) { const struct rwlock *rw = (const struct rwlock *)lock; uintptr_t x = rw->rw_lock; *owner = rw_wowner(rw); return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : (*owner != NULL)); } #endif void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) { struct rwlock *rw; int flags; rw = rwlock2rw(c); MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | RW_RECURSE | RW_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, ("%s: rw_lock not aligned for %s: %p", __func__, name, &rw->rw_lock)); flags = LO_UPGRADABLE; if (opts & RW_DUPOK) flags |= LO_DUPOK; if (opts & RW_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & RW_NOWITNESS)) flags |= LO_WITNESS; if (opts & RW_RECURSE) flags |= LO_RECURSABLE; if (opts & RW_QUIET) flags |= LO_QUIET; if (opts & RW_NEW) flags |= LO_NEW; lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); rw->rw_lock = RW_UNLOCKED; rw->rw_recurse = 0; } void _rw_destroy(volatile uintptr_t *c) { struct rwlock *rw; rw = rwlock2rw(c); KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); rw->rw_lock = RW_DESTROYED; lock_destroy(&rw->lock_object); } void rw_sysinit(void *arg) { struct rw_args *args = arg; rw_init((struct rwlock *)args->ra_rw, args->ra_desc); } void rw_sysinit_flags(void *arg) { struct rw_args_flags *args = arg; rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, args->ra_flags); } int _rw_wowned(const volatile uintptr_t *c) { return (rw_wowner(rwlock2rw(c)) == curthread); } void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __rw_wlock(rw, curthread, file, line); LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); curthread->td_locks++; } int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; int rval; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); if (rw_wlocked(rw) && (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) { rw->rw_recurse++; rval = 1; } else rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED, (uintptr_t)curthread); LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); if (!rw_recursed(rw)) - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(rw__acquire, - rw, 0, 0, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, + rw, 0, 0, file, line, LOCKSTAT_WRITER); curthread->td_locks++; } return (rval); } void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); __rw_wunlock(rw, curthread, file, line); curthread->td_locks--; } /* * Determines whether a new reader can acquire a lock. Succeeds if the * reader already owns a read lock and the lock is locked for read to * prevent deadlock from reader recursion. Also succeeds if the lock * is unlocked and has no writer waiters or spinners. Failing otherwise * prioritizes writers before readers. */ #define RW_CAN_READ(_rw) \ ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \ (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \ RW_LOCK_READ) void __rw_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif uintptr_t v; #ifdef KDTRACE_HOOKS uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); KASSERT(rw_wowner(rw) != curthread, ("rw_rlock: wlock already held for %s @ %s:%d", rw->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&rw->lock_object); state = rw->rw_lock; #endif for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif /* * Handle the easy case. If no other thread has a write * lock, then try to bump up the count of read locks. Note * that we have to preserve the current state of the * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a * read lock, then rw_lock must have changed, so restart * the loop. Note that this handles the case of a * completely unlocked rwlock since such a lock is encoded * as a read lock with no waiters. */ v = rw->rw_lock; if (RW_CAN_READ(v)) { /* * The RW_LOCK_READ_WAITERS flag should only be set * if the lock has been unlocked and write waiters * were present. */ if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, v + RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, rw, (void *)v, (void *)(v + RW_ONE_READER)); break; } continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } else if (spintries < rowner_retries) { spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { v = rw->rw_lock; if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v)) break; cpu_spinwait(); } #ifdef KDTRACE_HOOKS spin_cnt += rowner_loops - i; #endif KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i != rowner_loops) continue; } #endif /* * Okay, now it's the hard case. Some other thread already * has a write lock or there are write waiters present, * acquire the turnstile lock so we can begin the process * of blocking. */ ts = turnstile_trywait(&rw->lock_object); /* * The lock might have been released while we spun, so * recheck its state and restart the loop if needed. */ v = rw->rw_lock; if (RW_CAN_READ(v)) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * The lock is held in write mode or it already has waiters. */ MPASS(!RW_CAN_READ(v)); /* * If the RW_LOCK_READ_WAITERS flag is already set, then * we can go ahead and block. If it is not set then try * to set it. If we fail to set it drop the turnstile * lock and restart the loop. */ if (!(v & RW_LOCK_READ_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_READ_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the read waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(rw__block, rw, sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); #endif /* * TODO: acquire "owner of record" here. Here be turnstile dragons * however. turnstiles don't like owners changing between calls to * turnstile_wait() currently. */ - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(rw__acquire, rw, contested, - waittime, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, + waittime, file, line, LOCKSTAT_READER); LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); curthread->td_locks++; curthread->td_rw_rlocks++; } int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t x; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); for (;;) { x = rw->rw_lock; KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); if (!(x & RW_LOCK_READ)) break; if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, line); WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(rw__acquire, - rw, 0, 0, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, + rw, 0, 0, file, line, LOCKSTAT_READER); curthread->td_locks++; curthread->td_rw_rlocks++; return (1); } } LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); return (0); } void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t x, v, queue; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_RLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, 0, file, line); LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); /* TODO: drop "owner of record" here. */ for (;;) { /* * See if there is more than one read lock held. If so, * just drop one and return. */ x = rw->rw_lock; if (RW_READERS(x) > 1) { if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, x - RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, rw, (void *)x, (void *)(x - RW_ONE_READER)); break; } continue; } /* * If there aren't any waiters for a write lock, then try * to drop it quickly. */ if (!(x & RW_LOCK_WAITERS)) { MPASS((x & ~RW_LOCK_WRITE_SPINNER) == RW_READERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, RW_UNLOCKED)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded", __func__, rw); break; } continue; } /* * Ok, we know we have waiters and we think we are the * last reader, so grab the turnstile lock. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); MPASS(v & RW_LOCK_WAITERS); /* * Try to drop our lock leaving the lock in a unlocked * state. * * If you wanted to do explicit lock handoff you'd have to * do it here. You'd also want to use turnstile_signal() * and you'd have to handle the race where a higher * priority thread blocks on the write lock before the * thread you wakeup actually runs and have the new thread * "steal" the lock. For now it's a lot simpler to just * wakeup all of the waiters. * * As above, if we fail, then another thread might have * acquired a read lock, so drop the turnstile lock and * restart. */ x = RW_UNLOCKED; if (v & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; x |= (v & RW_LOCK_READ_WAITERS); } else queue = TS_SHARED_QUEUE; if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, x)) { turnstile_chain_unlock(&rw->lock_object); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", __func__, rw); /* * Ok. The lock is released and all that's left is to * wake up the waiters. Note that the lock might not be * free anymore, but in that case the writers will just * block again if they run before the new lock holder(s) * release the lock. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); turnstile_broadcast(ts, queue); turnstile_unpend(ts, TS_SHARED_LOCK); turnstile_chain_unlock(&rw->lock_object); break; } - LOCKSTAT_PROFILE_RELEASE_LOCK(rw__release, rw); + LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER); curthread->td_locks--; curthread->td_rw_rlocks--; } /* * This function is called when we are unable to obtain a write lock on the * first try. This means that at least one other thread holds either a * read or write lock. */ void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif uintptr_t v, x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #ifdef KDTRACE_HOOKS uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw)) { KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, ("%s: recursing but non-recursive rw %s @ %s:%d\n", __func__, rw->lock_object.lo_name, file, line)); rw->rw_recurse++; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); return; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&rw->lock_object); state = rw->rw_lock; #endif while (!_rw_write_lock(rw, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ v = rw->rw_lock; owner = (struct thread *)RW_OWNER(v); if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } if ((v & RW_LOCK_READ) && RW_READERS(v) && spintries < rowner_retries) { if (!(v & RW_LOCK_WRITE_SPINNER)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_SPINNER)) { continue; } } spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) break; cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); #ifdef KDTRACE_HOOKS spin_cnt += rowner_loops - i; #endif if (i != rowner_loops) continue; } #endif ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (!(v & RW_LOCK_READ)) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * Check for the waiters flags about this rwlock. * If the lock was released, without maintain any pending * waiters queue, simply try to acquire it. * If a pending waiters queue is present, claim the lock * ownership and maintain the pending queue. */ x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); if ((v & ~x) == RW_UNLOCKED) { x &= ~RW_LOCK_WRITE_SPINNER; if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); continue; } /* * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to * set it. If we fail to set it, then loop back and try * again. */ if (!(v & RW_LOCK_WRITE_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set write waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the write waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); #ifdef ADAPTIVE_RWLOCKS spintries = 0; #endif } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(rw__block, rw, sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); #endif - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(rw__acquire, rw, contested, - waittime, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, + waittime, file, line, LOCKSTAT_WRITER); } /* * This function is called if the first try at releasing a write lock failed. * This means that one of the 2 waiter bits must be set indicating that at * least one thread is waiting on this lock. */ void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t v; int queue; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw) && rw_recursed(rw)) { rw->rw_recurse--; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); return; } KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), ("%s: neither of the waiter flags are set", __func__)); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); turnstile_chain_lock(&rw->lock_object); ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); /* * Use the same algo as sx locks for now. Prefer waking up shared * waiters if we have any over writers. This is probably not ideal. * * 'v' is the value we are going to write back to rw_lock. If we * have waiters on both queues, we need to preserve the state of * the waiter flag for the queue we don't wake up. For now this is * hardcoded for the algorithm mentioned above. * * In the case of both readers and writers waiting we wakeup the * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a * new writer comes in before a reader it will claim the lock up * above. There is probably a potential priority inversion in * there that could be worked around either by waking both queues * of waiters or doing some complicated lock handoff gymnastics. */ v = RW_UNLOCKED; if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; v |= (rw->rw_lock & RW_LOCK_READ_WAITERS); } else queue = TS_SHARED_QUEUE; /* Wake up all waiters for the specific queue. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, queue == TS_SHARED_QUEUE ? "read" : "write"); turnstile_broadcast(ts, queue); atomic_store_rel_ptr(&rw->rw_lock, v); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); turnstile_chain_unlock(&rw->lock_object); } /* * Attempt to do a non-blocking upgrade from a read lock to a write * lock. This will only succeed if this thread holds a single read * lock. Returns true if the upgrade succeeded and false otherwise. */ int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t v, x, tid; struct turnstile *ts; int success; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_RLOCKED, file, line); /* * Attempt to switch from one reader to a writer. If there * are any write waiters, then we will have to lock the * turnstile first to prevent races with another writer * calling turnstile_wait() before we have claimed this * turnstile. So, do the simple case of no waiters first. */ tid = (uintptr_t)curthread; success = 0; for (;;) { v = rw->rw_lock; if (RW_READERS(v) > 1) break; if (!(v & RW_LOCK_WAITERS)) { success = atomic_cmpset_ptr(&rw->rw_lock, v, tid); if (!success) continue; break; } /* * Ok, we think we have waiters, so lock the turnstile. */ ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; if (RW_READERS(v) > 1) { turnstile_cancel(ts); break; } /* * Try to switch from one reader to a writer again. This time * we honor the current state of the waiters flags. * If we obtain the lock with the flags set, then claim * ownership of the turnstile. */ x = rw->rw_lock & RW_LOCK_WAITERS; success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); if (success) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); } LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); if (success) { curthread->td_rw_rlocks--; WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(rw__upgrade, rw); } return (success); } /* * Downgrade a write lock into a single read lock. */ void __rw_downgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t tid, v; int rwait, wwait; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line); #ifndef INVARIANTS if (rw_recursed(rw)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); /* * Convert from a writer to a single reader. First we handle * the easy case with no waiters. If there are any waiters, we * lock the turnstile and "disown" the lock. */ tid = (uintptr_t)curthread; if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) goto out; /* * Ok, we think we have waiters, so lock the turnstile so we can * read the waiter flags without any races. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & RW_LOCK_WAITERS; rwait = v & RW_LOCK_READ_WAITERS; wwait = v & RW_LOCK_WRITE_WAITERS; MPASS(rwait | wwait); /* * Downgrade from a write lock while preserving waiters flag * and give up ownership of the turnstile. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); if (!wwait) v &= ~RW_LOCK_READ_WAITERS; atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); /* * Wake other readers if there are no writers pending. Otherwise they * won't be able to acquire the lock anyway. */ if (rwait && !wwait) { turnstile_broadcast(ts, TS_SHARED_QUEUE); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); } else turnstile_disown(ts); turnstile_chain_unlock(&rw->lock_object); out: curthread->td_rw_rlocks++; LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(rw__downgrade, rw); } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef __rw_assert #endif /* * In the non-WITNESS case, rw_assert() can only detect that at least * *some* thread owns an rlock, but it cannot guarantee that *this* * thread owns an rlock. */ void __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct rwlock *rw; if (panicstr != NULL) return; rw = rwlock2rw(c); switch (what) { case RA_LOCKED: case RA_LOCKED | RA_RECURSED: case RA_LOCKED | RA_NOTRECURSED: case RA_RLOCKED: case RA_RLOCKED | RA_RECURSED: case RA_RLOCKED | RA_NOTRECURSED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If some other thread has a write lock or we have one * and are asserting a read lock, fail. Also, if no one * has a lock at all, fail. */ if (rw->rw_lock == RW_UNLOCKED || (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || rw_wowner(rw) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", rw->lock_object.lo_name, (what & RA_RLOCKED) ? "read " : "", file, line); if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } #endif break; case RA_WLOCKED: case RA_WLOCKED | RA_RECURSED: case RA_WLOCKED | RA_NOTRECURSED: if (rw_wowner(rw) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); break; case RA_UNLOCKED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If we hold a write lock fail. We can't reliably check * to see if we hold a read lock or not. */ if (rw_wowner(rw) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); #endif break; default: panic("Unknown rw lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB void db_show_rwlock(const struct lock_object *lock) { const struct rwlock *rw; struct thread *td; rw = (const struct rwlock *)lock; db_printf(" state: "); if (rw->rw_lock == RW_UNLOCKED) db_printf("UNLOCKED\n"); else if (rw->rw_lock == RW_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (rw->rw_lock & RW_LOCK_READ) db_printf("RLOCK: %ju locks\n", (uintmax_t)(RW_READERS(rw->rw_lock))); else { td = rw_wowner(rw); db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (rw_recursed(rw)) db_printf(" recursed: %u\n", rw->rw_recurse); } db_printf(" waiters: "); switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { case RW_LOCK_READ_WAITERS: db_printf("readers\n"); break; case RW_LOCK_WRITE_WAITERS: db_printf("writers\n"); break; case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: db_printf("readers and writers\n"); break; default: db_printf("none\n"); break; } } #endif Index: head/sys/kern/kern_sx.c =================================================================== --- head/sys/kern/kern_sx.c (revision 285703) +++ head/sys/kern/kern_sx.c (revision 285704) @@ -1,1255 +1,1255 @@ /*- * Copyright (c) 2007 Attilio Rao * Copyright (c) 2001 Jason Evans * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ /* * Shared/exclusive locks. This implementation attempts to ensure * deterministic lock granting behavior, so that slocks and xlocks are * interleaved. * * Priority propagation will not generally raise the priority of lock holders, * so should not be relied upon in combination with sx locks. */ #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_no_adaptive_sx.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_SX) #include #endif #ifdef DDB #include #endif #if defined(SMP) && !defined(NO_ADAPTIVE_SX) #define ADAPTIVE_SX #endif CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE); #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* Handy macros for sleep queues. */ #define SQ_EXCLUSIVE_QUEUE 0 #define SQ_SHARED_QUEUE 1 /* * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file. We * drop Giant anytime we have to sleep or if we adaptively spin. */ #define GIANT_DECLARE \ int _giantcnt = 0; \ WITNESS_SAVE_DECL(Giant) \ #define GIANT_SAVE() do { \ if (mtx_owned(&Giant)) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ while (mtx_owned(&Giant)) { \ _giantcnt++; \ mtx_unlock(&Giant); \ } \ } \ } while (0) #define GIANT_RESTORE() do { \ if (_giantcnt > 0) { \ mtx_assert(&Giant, MA_NOTOWNED); \ while (_giantcnt--) \ mtx_lock(&Giant); \ WITNESS_RESTORE(&Giant.lock_object, Giant); \ } \ } while (0) /* * Returns true if an exclusive lock is recursed. It assumes * curthread currently has an exclusive lock. */ #define sx_recursed(sx) ((sx)->sx_recurse != 0) static void assert_sx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_sx(const struct lock_object *lock); #endif static void lock_sx(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_sx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_sx(struct lock_object *lock); struct lock_class lock_class_sx = { .lc_name = "sx", .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_sx, #ifdef DDB .lc_ddb_show = db_show_sx, #endif .lc_lock = lock_sx, .lc_unlock = unlock_sx, #ifdef KDTRACE_HOOKS .lc_owner = owner_sx, #endif }; #ifndef INVARIANTS #define _sx_assert(sx, what, file, line) #endif #ifdef ADAPTIVE_SX static u_int asx_retries = 10; static u_int asx_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging"); SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, ""); SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, ""); #endif void assert_sx(const struct lock_object *lock, int what) { sx_assert((const struct sx *)lock, what); } void lock_sx(struct lock_object *lock, uintptr_t how) { struct sx *sx; sx = (struct sx *)lock; if (how) sx_slock(sx); else sx_xlock(sx); } uintptr_t unlock_sx(struct lock_object *lock) { struct sx *sx; sx = (struct sx *)lock; sx_assert(sx, SA_LOCKED | SA_NOTRECURSED); if (sx_xlocked(sx)) { sx_xunlock(sx); return (0); } else { sx_sunlock(sx); return (1); } } #ifdef KDTRACE_HOOKS int owner_sx(const struct lock_object *lock, struct thread **owner) { const struct sx *sx = (const struct sx *)lock; uintptr_t x = sx->sx_lock; *owner = (struct thread *)SX_OWNER(x); return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) : (*owner != NULL)); } #endif void sx_sysinit(void *arg) { struct sx_args *sargs = arg; sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags); } void sx_init_flags(struct sx *sx, const char *description, int opts) { int flags; MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK | SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock, ("%s: sx_lock not aligned for %s: %p", __func__, description, &sx->sx_lock)); flags = LO_SLEEPABLE | LO_UPGRADABLE; if (opts & SX_DUPOK) flags |= LO_DUPOK; if (opts & SX_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & SX_NOWITNESS)) flags |= LO_WITNESS; if (opts & SX_RECURSE) flags |= LO_RECURSABLE; if (opts & SX_QUIET) flags |= LO_QUIET; if (opts & SX_NEW) flags |= LO_NEW; flags |= opts & SX_NOADAPTIVE; lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags); sx->sx_lock = SX_LOCK_UNLOCKED; sx->sx_recurse = 0; } void sx_destroy(struct sx *sx) { KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held")); KASSERT(sx->sx_recurse == 0, ("sx lock still recursed")); sx->sx_lock = SX_LOCK_DESTROYED; lock_destroy(&sx->lock_object); } int _sx_slock(struct sx *sx, int opts, const char *file, int line) { int error = 0; if (SCHEDULER_STOPPED()) return (0); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_slock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_slock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL); error = __sx_slock(sx, opts, file, line); if (!error) { LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line); WITNESS_LOCK(&sx->lock_object, 0, file, line); curthread->td_locks++; } return (error); } int sx_try_slock_(struct sx *sx, const char *file, int line) { uintptr_t x; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_try_slock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); for (;;) { x = sx->sx_lock; KASSERT(x != SX_LOCK_DESTROYED, ("sx_try_slock() of destroyed sx @ %s:%d", file, line)); if (!(x & SX_LOCK_SHARED)) break; if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line); WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line); - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(sx__acquire, - sx, 0, 0, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, + sx, 0, 0, file, line, LOCKSTAT_READER); curthread->td_locks++; return (1); } } LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line); return (0); } int _sx_xlock(struct sx *sx, int opts, const char *file, int line) { int error = 0; if (SCHEDULER_STOPPED()) return (0); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_xlock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xlock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); error = __sx_xlock(sx, curthread, opts, file, line); if (!error) { LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); curthread->td_locks++; } return (error); } int sx_try_xlock_(struct sx *sx, const char *file, int line) { int rval; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_xlock() of destroyed sx @ %s:%d", file, line)); if (sx_xlocked(sx) && (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) { sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); rval = 1; } else rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, (uintptr_t)curthread); LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); if (!sx_recursed(sx)) - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(sx__acquire, - sx, 0, 0, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, + sx, 0, 0, file, line, LOCKSTAT_WRITER); curthread->td_locks++; } return (rval); } void _sx_sunlock(struct sx *sx, const char *file, int line) { if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_sunlock() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_SLOCKED, file, line); WITNESS_UNLOCK(&sx->lock_object, 0, file, line); LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line); __sx_sunlock(sx, file, line); curthread->td_locks--; } void _sx_xunlock(struct sx *sx, const char *file, int line) { if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xunlock() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_XLOCKED, file, line); WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); __sx_xunlock(sx, curthread, file, line); curthread->td_locks--; } /* * Try to do a non-blocking upgrade from a shared lock to an exclusive lock. * This will only succeed if this thread holds a single shared lock. * Return 1 if if the upgrade succeed, 0 otherwise. */ int sx_try_upgrade_(struct sx *sx, const char *file, int line) { uintptr_t x; int success; if (SCHEDULER_STOPPED()) return (1); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_SLOCKED, file, line); /* * Try to switch from one shared lock to an exclusive lock. We need * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that * we will wake up the exclusive waiters when we drop the lock. */ x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS; success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x, (uintptr_t)curthread | x); LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line); if (success) { WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(sx__upgrade, sx); } return (success); } /* * Downgrade an unrecursed exclusive lock into a single shared lock. */ void sx_downgrade_(struct sx *sx, const char *file, int line) { uintptr_t x; int wakeup_swapper; if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_downgrade() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line); #ifndef INVARIANTS if (sx_recursed(sx)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line); /* * Try to switch from an exclusive lock with no shared waiters * to one sharer with no shared waiters. If there are * exclusive waiters, we don't need to lock the sleep queue so * long as we preserve the flag. We do one quick try and if * that fails we grab the sleepq lock to keep the flags from * changing and do it the slow way. * * We have to lock the sleep queue if there are shared waiters * so we can wake them up. */ x = sx->sx_lock; if (!(x & SX_LOCK_SHARED_WAITERS) && atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) | (x & SX_LOCK_EXCLUSIVE_WAITERS))) { LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line); return; } /* * Lock the sleep queue so we can read the waiters bits * without any races and wakeup any shared waiters. */ sleepq_lock(&sx->lock_object); /* * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single * shared lock. If there are any shared waiters, wake them up. */ wakeup_swapper = 0; x = sx->sx_lock; atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | (x & SX_LOCK_EXCLUSIVE_WAITERS)); if (x & SX_LOCK_SHARED_WAITERS) wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, SQ_SHARED_QUEUE); sleepq_release(&sx->lock_object); LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(sx__downgrade, sx); if (wakeup_swapper) kick_proc0(); } /* * This function represents the so-called 'hard case' for sx_xlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; u_int i, spintries = 0; #endif uintptr_t x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int error = 0; #ifdef KDTRACE_HOOKS uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n", sx->lock_object.lo_name, file, line)); sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx); return (0); } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, sx->lock_object.lo_name, (void *)sx->sx_lock, file, line); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&sx->lock_object); state = sx->sx_lock; #endif while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ x = sx->sx_lock; if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { if ((x & SX_LOCK_SHARED) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } else if (SX_SHARERS(x) && spintries < asx_retries) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); spintries++; for (i = 0; i < asx_loops; i++) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, sx, spintries, i); x = sx->sx_lock; if ((x & SX_LOCK_SHARED) == 0 || SX_SHARERS(x) == 0) break; cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); if (i != asx_loops) continue; } } #endif sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * If the lock was released while spinning on the * sleep queue chain lock, try again. */ if (x == SX_LOCK_UNLOCKED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the sleep queue * chain lock. If so, drop the sleep queue lock and try * again. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * If an exclusive lock was released with both shared * and exclusive waiters and a shared waiter hasn't * woken up and acquired the lock yet, sx_lock will be * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS. * If we see that value, try to acquire it once. Note * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS * as there are other exclusive waiters still. If we * fail, restart the loop. */ if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) { if (atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS, tid | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); CTR2(KTR_LOCK, "%s: %p claimed by new writer", __func__, sx); break; } sleepq_release(&sx->lock_object); continue; } /* * Try to set the SX_LOCK_EXCLUSIVE_WAITERS. If we fail, * than loop back and retry. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set excl waiters flag", __func__, sx); } /* * Since we have been unable to acquire the exclusive * lock and the exclusive waiters flag is set, we have * to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&sx->lock_object); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&sx->lock_object); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&sx->lock_object); if (sleep_time) LOCKSTAT_RECORD4(sx__block, sx, sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); #endif if (!error) - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(sx__acquire, sx, - contested, waittime, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, + contested, waittime, file, line, LOCKSTAT_WRITER); GIANT_RESTORE(); return (error); } /* * This function represents the so-called 'hard case' for sx_xunlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line) { uintptr_t x; int queue, wakeup_swapper; if (SCHEDULER_STOPPED()) return; MPASS(!(sx->sx_lock & SX_LOCK_SHARED)); /* If the lock is recursed, then unrecurse one level. */ if (sx_xlocked(sx) && sx_recursed(sx)) { if ((--sx->sx_recurse) == 0) atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx); return; } MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, sx); sleepq_lock(&sx->lock_object); x = SX_LOCK_UNLOCKED; /* * The wake up algorithm here is quite simple and probably not * ideal. It gives precedence to shared waiters if they are * present. For this condition, we have to preserve the * state of the exclusive waiters flag. * If interruptible sleeps left the shared queue empty avoid a * starvation for the threads sleeping on the exclusive queue by giving * them precedence and cleaning up the shared waiters bit anyway. */ if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 && sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) { queue = SQ_SHARED_QUEUE; x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS); } else queue = SQ_EXCLUSIVE_QUEUE; /* Wake up all the waiters for the specific queue. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue", __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); atomic_store_rel_ptr(&sx->sx_lock, x); wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, queue); sleepq_release(&sx->lock_object); if (wakeup_swapper) kick_proc0(); } /* * This function represents the so-called 'hard case' for sx_slock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif uintptr_t x; int error = 0; #ifdef KDTRACE_HOOKS uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); #ifdef KDTRACE_HOOKS state = sx->sx_lock; all_time -= lockstat_nsecs(&sx->lock_object); #endif /* * As with rwlocks, we don't make any attempt to try to block * shared locks once there is an exclusive waiter. */ for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif x = sx->sx_lock; /* * If no other thread has an exclusive lock then try to bump up * the count of sharers. Since we have to preserve the state * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the * shared lock loop back and retry. */ if (x & SX_LOCK_SHARED) { MPASS(!(x & SX_LOCK_SHARED_WAITERS)); if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, sx, (void *)x, (void *)(x + SX_ONE_SHARER)); break; } continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } } #endif /* * Some other thread already has an exclusive lock, so * start the process of blocking. */ sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * The lock could have been released while we spun. * In this case loop back and retry. */ if (x & SX_LOCK_SHARED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * Try to set the SX_LOCK_SHARED_WAITERS flag. If we * fail to set it drop the sleep queue lock and loop * back. */ if (!(x & SX_LOCK_SHARED_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_SHARED_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set shared waiters flag", __func__, sx); } /* * Since we have been unable to acquire the shared lock, * we have to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&sx->lock_object); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&sx->lock_object); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&sx->lock_object); if (sleep_time) LOCKSTAT_RECORD4(sx__block, sx, sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); #endif if (error == 0) - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(sx__acquire, sx, contested, - waittime, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, + contested, waittime, file, line, LOCKSTAT_READER); GIANT_RESTORE(); return (error); } /* * This function represents the so-called 'hard case' for sx_sunlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ void _sx_sunlock_hard(struct sx *sx, const char *file, int line) { uintptr_t x; int wakeup_swapper; if (SCHEDULER_STOPPED()) return; for (;;) { x = sx->sx_lock; /* * We should never have sharers while at least one thread * holds a shared lock. */ KASSERT(!(x & SX_LOCK_SHARED_WAITERS), ("%s: waiting sharers", __func__)); /* * See if there is more than one shared lock held. If * so, just drop one and return. */ if (SX_SHARERS(x) > 1) { if (atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, sx, (void *)x, (void *)(x - SX_ONE_SHARER)); break; } continue; } /* * If there aren't any waiters for an exclusive lock, * then try to drop it quickly. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { MPASS(x == SX_SHARERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded", __func__, sx); break; } continue; } /* * At this point, there should just be one sharer with * exclusive waiters. */ MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS)); sleepq_lock(&sx->lock_object); /* * Wake up semantic here is quite simple: * Just wake up all the exclusive waiters. * Note that the state of the lock could have changed, * so if it fails loop back and retry. */ if (!atomic_cmpset_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS, SX_LOCK_UNLOCKED)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p waking up all thread on" "exclusive queue", __func__, sx); wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, SQ_EXCLUSIVE_QUEUE); sleepq_release(&sx->lock_object); if (wakeup_swapper) kick_proc0(); break; } } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef _sx_assert #endif /* * In the non-WITNESS case, sx_assert() can only detect that at least * *some* thread owns an slock, but it cannot guarantee that *this* * thread owns an slock. */ void _sx_assert(const struct sx *sx, int what, const char *file, int line) { #ifndef WITNESS int slocked = 0; #endif if (panicstr != NULL) return; switch (what) { case SA_SLOCKED: case SA_SLOCKED | SA_NOTRECURSED: case SA_SLOCKED | SA_RECURSED: #ifndef WITNESS slocked = 1; /* FALLTHROUGH */ #endif case SA_LOCKED: case SA_LOCKED | SA_NOTRECURSED: case SA_LOCKED | SA_RECURSED: #ifdef WITNESS witness_assert(&sx->lock_object, what, file, line); #else /* * If some other thread has an exclusive lock or we * have one and are asserting a shared lock, fail. * Also, if no one has a lock at all, fail. */ if (sx->sx_lock == SX_LOCK_UNLOCKED || (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked || sx_xholder(sx) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", sx->lock_object.lo_name, slocked ? "share " : "", file, line); if (!(sx->sx_lock & SX_LOCK_SHARED)) { if (sx_recursed(sx)) { if (what & SA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } else if (what & SA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } #endif break; case SA_XLOCKED: case SA_XLOCKED | SA_NOTRECURSED: case SA_XLOCKED | SA_RECURSED: if (sx_xholder(sx) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", sx->lock_object.lo_name, file, line); if (sx_recursed(sx)) { if (what & SA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } else if (what & SA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); break; case SA_UNLOCKED: #ifdef WITNESS witness_assert(&sx->lock_object, what, file, line); #else /* * If we hold an exclusve lock fail. We can't * reliably check to see if we hold a shared lock or * not. */ if (sx_xholder(sx) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", sx->lock_object.lo_name, file, line); #endif break; default: panic("Unknown sx lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB static void db_show_sx(const struct lock_object *lock) { struct thread *td; const struct sx *sx; sx = (const struct sx *)lock; db_printf(" state: "); if (sx->sx_lock == SX_LOCK_UNLOCKED) db_printf("UNLOCKED\n"); else if (sx->sx_lock == SX_LOCK_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock)); else { td = sx_xholder(sx); db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (sx_recursed(sx)) db_printf(" recursed: %d\n", sx->sx_recurse); } db_printf(" waiters: "); switch(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) { case SX_LOCK_SHARED_WAITERS: db_printf("shared\n"); break; case SX_LOCK_EXCLUSIVE_WAITERS: db_printf("exclusive\n"); break; case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS: db_printf("exclusive and shared\n"); break; default: db_printf("none\n"); } } /* * Check to see if a thread that is blocked on a sleep queue is actually * blocked on an sx lock. If so, output some details and return true. * If the lock has an exclusive owner, return that in *ownerp. */ int sx_chain(struct thread *td, struct thread **ownerp) { struct sx *sx; /* * Check to see if this thread is blocked on an sx lock. * First, we check the lock class. If that is ok, then we * compare the lock name against the wait message. */ sx = td->td_wchan; if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx || sx->lock_object.lo_name != td->td_wmesg) return (0); /* We think we have an sx lock, so output some details. */ db_printf("blocked on sx \"%s\" ", td->td_wmesg); *ownerp = sx_xholder(sx); if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK (count %ju)\n", (uintmax_t)SX_SHARERS(sx->sx_lock)); else db_printf("XLOCK\n"); return (1); } #endif Index: head/sys/sys/lockstat.h =================================================================== --- head/sys/sys/lockstat.h (revision 285703) +++ head/sys/sys/lockstat.h (revision 285704) @@ -1,119 +1,135 @@ /*- * Copyright (c) 2008-2009 Stacey Son * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * DTrace lockstat provider definitions */ #ifndef _SYS_LOCKSTAT_H #define _SYS_LOCKSTAT_H #ifdef _KERNEL #include #include #include SDT_PROVIDER_DECLARE(lockstat); SDT_PROBE_DECLARE(lockstat, , , adaptive__acquire); SDT_PROBE_DECLARE(lockstat, , , adaptive__release); SDT_PROBE_DECLARE(lockstat, , , adaptive__spin); SDT_PROBE_DECLARE(lockstat, , , adaptive__block); SDT_PROBE_DECLARE(lockstat, , , spin__acquire); SDT_PROBE_DECLARE(lockstat, , , spin__release); SDT_PROBE_DECLARE(lockstat, , , spin__spin); SDT_PROBE_DECLARE(lockstat, , , rw__acquire); SDT_PROBE_DECLARE(lockstat, , , rw__release); SDT_PROBE_DECLARE(lockstat, , , rw__block); SDT_PROBE_DECLARE(lockstat, , , rw__spin); SDT_PROBE_DECLARE(lockstat, , , rw__upgrade); SDT_PROBE_DECLARE(lockstat, , , rw__downgrade); SDT_PROBE_DECLARE(lockstat, , , sx__acquire); SDT_PROBE_DECLARE(lockstat, , , sx__release); SDT_PROBE_DECLARE(lockstat, , , sx__block); SDT_PROBE_DECLARE(lockstat, , , sx__spin); SDT_PROBE_DECLARE(lockstat, , , sx__upgrade); SDT_PROBE_DECLARE(lockstat, , , sx__downgrade); SDT_PROBE_DECLARE(lockstat, , , thread__spin); #define LOCKSTAT_WRITER 0 #define LOCKSTAT_READER 1 #ifdef KDTRACE_HOOKS #define LOCKSTAT_RECORD0(probe, lp) \ SDT_PROBE1(lockstat, , , probe, lp) #define LOCKSTAT_RECORD1(probe, lp, arg1) \ SDT_PROBE2(lockstat, , , probe, lp, arg1) #define LOCKSTAT_RECORD2(probe, lp, arg1, arg2) \ SDT_PROBE3(lockstat, , , probe, lp, arg1, arg2) #define LOCKSTAT_RECORD3(probe, lp, arg1, arg2, arg3) \ SDT_PROBE4(lockstat, , , probe, lp, arg1, arg2, arg3) #define LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, arg4) \ SDT_PROBE5(lockstat, , , probe, lp, arg1, arg2, arg3, arg4) #define LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l) do { \ lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l); \ LOCKSTAT_RECORD0(probe, lp); \ } while (0) +#define LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(probe, lp, c, wt, f, l, a) do { \ + lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l); \ + LOCKSTAT_RECORD1(probe, lp, a); \ +} while (0) + #define LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) do { \ lock_profile_release_lock(&(lp)->lock_object); \ LOCKSTAT_RECORD0(probe, lp); \ } while (0) +#define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) do { \ + lock_profile_release_lock(&(lp)->lock_object); \ + LOCKSTAT_RECORD1(probe, lp, a); \ +} while (0) + extern int lockstat_enabled; struct lock_object; -extern uint64_t lockstat_nsecs(struct lock_object *); +uint64_t lockstat_nsecs(struct lock_object *); #else /* !KDTRACE_HOOKS */ #define LOCKSTAT_RECORD0(probe, lp) #define LOCKSTAT_RECORD1(probe, lp, arg1) #define LOCKSTAT_RECORD2(probe, lp, arg1, arg2) #define LOCKSTAT_RECORD3(probe, lp, arg1, arg2, arg3) #define LOCKSTAT_RECORD4(probe, lp, arg1, arg2, arg3, arg4) #define LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l) \ lock_profile_obtain_lock_success(&(lp)->lock_object, c, wt, f, l) +#define LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(probe, lp, c, wt, f, l, a) \ + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(probe, lp, c, wt, f, l) + #define LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) \ lock_profile_release_lock(&(lp)->lock_object) + +#define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) \ + LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) #endif /* !KDTRACE_HOOKS */ #endif /* _KERNEL */ #endif /* _SYS_LOCKSTAT_H */ Index: head/sys/sys/rwlock.h =================================================================== --- head/sys/sys/rwlock.h (revision 285703) +++ head/sys/sys/rwlock.h (revision 285704) @@ -1,285 +1,286 @@ /*- * Copyright (c) 2006 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_RWLOCK_H_ #define _SYS_RWLOCK_H_ #include #include #include #include #ifdef _KERNEL #include #include #endif /* * The rw_lock field consists of several fields. The low bit indicates * if the lock is locked with a read (shared) or write (exclusive) lock. * A value of 0 indicates a write lock, and a value of 1 indicates a read * lock. Bit 1 is a boolean indicating if there are any threads waiting * for a read lock. Bit 2 is a boolean indicating if there are any threads * waiting for a write lock. The rest of the variable's definition is * dependent on the value of the first bit. For a write lock, it is a * pointer to the thread holding the lock, similar to the mtx_lock field of * mutexes. For read locks, it is a count of read locks that are held. * * When the lock is not locked by any thread, it is encoded as a read lock * with zero waiters. */ #define RW_LOCK_READ 0x01 #define RW_LOCK_READ_WAITERS 0x02 #define RW_LOCK_WRITE_WAITERS 0x04 #define RW_LOCK_WRITE_SPINNER 0x08 #define RW_LOCK_FLAGMASK \ (RW_LOCK_READ | RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS | \ RW_LOCK_WRITE_SPINNER) #define RW_LOCK_WAITERS (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS) #define RW_OWNER(x) ((x) & ~RW_LOCK_FLAGMASK) #define RW_READERS_SHIFT 4 #define RW_READERS(x) (RW_OWNER((x)) >> RW_READERS_SHIFT) #define RW_READERS_LOCK(x) ((x) << RW_READERS_SHIFT | RW_LOCK_READ) #define RW_ONE_READER (1 << RW_READERS_SHIFT) #define RW_UNLOCKED RW_READERS_LOCK(0) #define RW_DESTROYED (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS) #ifdef _KERNEL #define rw_recurse lock_object.lo_data /* Very simple operations on rw_lock. */ /* Try to obtain a write lock once. */ #define _rw_write_lock(rw, tid) \ atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid)) /* Release a write lock quickly if there are no waiters. */ #define _rw_write_unlock(rw, tid) \ atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED) /* * Full lock operations that are suitable to be inlined in non-debug * kernels. If the lock cannot be acquired or released trivially then * the work is deferred to another function. */ /* Acquire a write lock. */ #define __rw_wlock(rw, tid, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ \ if (!_rw_write_lock((rw), _tid)) \ _rw_wlock_hard((rw), _tid, (file), (line)); \ else \ - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(rw__acquire, rw, \ - 0, 0, file, line); \ + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, \ + 0, 0, file, line, LOCKSTAT_WRITER); \ } while (0) /* Release a write lock. */ #define __rw_wunlock(rw, tid, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ \ if ((rw)->rw_recurse) \ (rw)->rw_recurse--; \ else { \ - LOCKSTAT_PROFILE_RELEASE_LOCK(rw__release, rw); \ + LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, \ + LOCKSTAT_WRITER); \ if (!_rw_write_unlock((rw), _tid)) \ _rw_wunlock_hard((rw), _tid, (file), (line)); \ } \ } while (0) /* * Function prototypes. Routines that start with _ are not part of the * external API and should not be called directly. Wrapper macros should * be used instead. */ void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts); void _rw_destroy(volatile uintptr_t *c); void rw_sysinit(void *arg); void rw_sysinit_flags(void *arg); int _rw_wowned(const volatile uintptr_t *c); void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line); int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line); void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line); void __rw_rlock(volatile uintptr_t *c, const char *file, int line); int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line); void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line); void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line); void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line); int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line); void __rw_downgrade(volatile uintptr_t *c, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line); #endif /* * Top-level macros to provide lock cookie once the actual rwlock is passed. * They will also prevent passing a malformed object to the rwlock KPI by * failing compilation as the rw_lock reserved member will not be found. */ #define rw_init(rw, n) \ _rw_init_flags(&(rw)->rw_lock, n, 0) #define rw_init_flags(rw, n, o) \ _rw_init_flags(&(rw)->rw_lock, n, o) #define rw_destroy(rw) \ _rw_destroy(&(rw)->rw_lock) #define rw_wowned(rw) \ _rw_wowned(&(rw)->rw_lock) #define _rw_wlock(rw, f, l) \ _rw_wlock_cookie(&(rw)->rw_lock, f, l) #define _rw_try_wlock(rw, f, l) \ __rw_try_wlock(&(rw)->rw_lock, f, l) #define _rw_wunlock(rw, f, l) \ _rw_wunlock_cookie(&(rw)->rw_lock, f, l) #define _rw_rlock(rw, f, l) \ __rw_rlock(&(rw)->rw_lock, f, l) #define _rw_try_rlock(rw, f, l) \ __rw_try_rlock(&(rw)->rw_lock, f, l) #define _rw_runlock(rw, f, l) \ _rw_runlock_cookie(&(rw)->rw_lock, f, l) #define _rw_wlock_hard(rw, t, f, l) \ __rw_wlock_hard(&(rw)->rw_lock, t, f, l) #define _rw_wunlock_hard(rw, t, f, l) \ __rw_wunlock_hard(&(rw)->rw_lock, t, f, l) #define _rw_try_upgrade(rw, f, l) \ __rw_try_upgrade(&(rw)->rw_lock, f, l) #define _rw_downgrade(rw, f, l) \ __rw_downgrade(&(rw)->rw_lock, f, l) #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define _rw_assert(rw, w, f, l) \ __rw_assert(&(rw)->rw_lock, w, f, l) #endif /* * Public interface for lock operations. */ #ifndef LOCK_DEBUG #error LOCK_DEBUG not defined, include before #endif #if LOCK_DEBUG > 0 || defined(RWLOCK_NOINLINE) #define rw_wlock(rw) _rw_wlock((rw), LOCK_FILE, LOCK_LINE) #define rw_wunlock(rw) _rw_wunlock((rw), LOCK_FILE, LOCK_LINE) #else #define rw_wlock(rw) \ __rw_wlock((rw), curthread, LOCK_FILE, LOCK_LINE) #define rw_wunlock(rw) \ __rw_wunlock((rw), curthread, LOCK_FILE, LOCK_LINE) #endif #define rw_rlock(rw) _rw_rlock((rw), LOCK_FILE, LOCK_LINE) #define rw_runlock(rw) _rw_runlock((rw), LOCK_FILE, LOCK_LINE) #define rw_try_rlock(rw) _rw_try_rlock((rw), LOCK_FILE, LOCK_LINE) #define rw_try_upgrade(rw) _rw_try_upgrade((rw), LOCK_FILE, LOCK_LINE) #define rw_try_wlock(rw) _rw_try_wlock((rw), LOCK_FILE, LOCK_LINE) #define rw_downgrade(rw) _rw_downgrade((rw), LOCK_FILE, LOCK_LINE) #define rw_unlock(rw) do { \ if (rw_wowned(rw)) \ rw_wunlock(rw); \ else \ rw_runlock(rw); \ } while (0) #define rw_sleep(chan, rw, pri, wmesg, timo) \ _sleep((chan), &(rw)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) #define rw_initialized(rw) lock_initialized(&(rw)->lock_object) struct rw_args { void *ra_rw; const char *ra_desc; }; struct rw_args_flags { void *ra_rw; const char *ra_desc; int ra_flags; }; #define RW_SYSINIT(name, rw, desc) \ static struct rw_args name##_args = { \ (rw), \ (desc), \ }; \ SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ rw_sysinit, &name##_args); \ SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock)) #define RW_SYSINIT_FLAGS(name, rw, desc, flags) \ static struct rw_args_flags name##_args = { \ (rw), \ (desc), \ (flags), \ }; \ SYSINIT(name##_rw_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ rw_sysinit_flags, &name##_args); \ SYSUNINIT(name##_rw_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ _rw_destroy, __DEVOLATILE(void *, &(rw)->rw_lock)) /* * Options passed to rw_init_flags(). */ #define RW_DUPOK 0x01 #define RW_NOPROFILE 0x02 #define RW_NOWITNESS 0x04 #define RW_QUIET 0x08 #define RW_RECURSE 0x10 #define RW_NEW 0x20 /* * The INVARIANTS-enabled rw_assert() functionality. * * The constants need to be defined for INVARIANT_SUPPORT infrastructure * support as _rw_assert() itself uses them and the latter implies that * _rw_assert() must build. */ #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define RA_LOCKED LA_LOCKED #define RA_RLOCKED LA_SLOCKED #define RA_WLOCKED LA_XLOCKED #define RA_UNLOCKED LA_UNLOCKED #define RA_RECURSED LA_RECURSED #define RA_NOTRECURSED LA_NOTRECURSED #endif #ifdef INVARIANTS #define rw_assert(rw, what) _rw_assert((rw), (what), LOCK_FILE, LOCK_LINE) #else #define rw_assert(rw, what) #endif #endif /* _KERNEL */ #endif /* !_SYS_RWLOCK_H_ */ Index: head/sys/sys/sx.h =================================================================== --- head/sys/sys/sx.h (revision 285703) +++ head/sys/sys/sx.h (revision 285704) @@ -1,321 +1,322 @@ /*- * Copyright (c) 2007 Attilio Rao * Copyright (c) 2001 Jason Evans * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SX_H_ #define _SYS_SX_H_ #include #include #ifdef _KERNEL #include #include #include #include #endif /* * In general, the sx locks and rwlocks use very similar algorithms. * The main difference in the implementations is how threads are * blocked when a lock is unavailable. For this, sx locks use sleep * queues which do not support priority propagation, and rwlocks use * turnstiles which do. * * The sx_lock field consists of several fields. The low bit * indicates if the lock is locked with a shared or exclusive lock. A * value of 0 indicates an exclusive lock, and a value of 1 indicates * a shared lock. Bit 1 is a boolean indicating if there are any * threads waiting for a shared lock. Bit 2 is a boolean indicating * if there are any threads waiting for an exclusive lock. Bit 3 is a * boolean indicating if an exclusive lock is recursively held. The * rest of the variable's definition is dependent on the value of the * first bit. For an exclusive lock, it is a pointer to the thread * holding the lock, similar to the mtx_lock field of mutexes. For * shared locks, it is a count of read locks that are held. * * When the lock is not locked by any thread, it is encoded as a * shared lock with zero waiters. */ #define SX_LOCK_SHARED 0x01 #define SX_LOCK_SHARED_WAITERS 0x02 #define SX_LOCK_EXCLUSIVE_WAITERS 0x04 #define SX_LOCK_RECURSED 0x08 #define SX_LOCK_FLAGMASK \ (SX_LOCK_SHARED | SX_LOCK_SHARED_WAITERS | \ SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_RECURSED) #define SX_OWNER(x) ((x) & ~SX_LOCK_FLAGMASK) #define SX_SHARERS_SHIFT 4 #define SX_SHARERS(x) (SX_OWNER(x) >> SX_SHARERS_SHIFT) #define SX_SHARERS_LOCK(x) \ ((x) << SX_SHARERS_SHIFT | SX_LOCK_SHARED) #define SX_ONE_SHARER (1 << SX_SHARERS_SHIFT) #define SX_LOCK_UNLOCKED SX_SHARERS_LOCK(0) #define SX_LOCK_DESTROYED \ (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS) #ifdef _KERNEL #define sx_recurse lock_object.lo_data /* * Function prototipes. Routines that start with an underscore are not part * of the public interface and are wrappered with a macro. */ void sx_sysinit(void *arg); #define sx_init(sx, desc) sx_init_flags((sx), (desc), 0) void sx_init_flags(struct sx *sx, const char *description, int opts); void sx_destroy(struct sx *sx); int sx_try_slock_(struct sx *sx, const char *file, int line); int sx_try_xlock_(struct sx *sx, const char *file, int line); int sx_try_upgrade_(struct sx *sx, const char *file, int line); void sx_downgrade_(struct sx *sx, const char *file, int line); int _sx_slock(struct sx *sx, int opts, const char *file, int line); int _sx_xlock(struct sx *sx, int opts, const char *file, int line); void _sx_sunlock(struct sx *sx, const char *file, int line); void _sx_xunlock(struct sx *sx, const char *file, int line); int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int line); int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line); void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line); void _sx_sunlock_hard(struct sx *sx, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void _sx_assert(const struct sx *sx, int what, const char *file, int line); #endif #ifdef DDB int sx_chain(struct thread *td, struct thread **ownerp); #endif struct sx_args { struct sx *sa_sx; const char *sa_desc; int sa_flags; }; #define SX_SYSINIT_FLAGS(name, sxa, desc, flags) \ static struct sx_args name##_args = { \ (sxa), \ (desc), \ (flags) \ }; \ SYSINIT(name##_sx_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ sx_sysinit, &name##_args); \ SYSUNINIT(name##_sx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ sx_destroy, (sxa)) #define SX_SYSINIT(name, sxa, desc) SX_SYSINIT_FLAGS(name, sxa, desc, 0) /* * Full lock operations that are suitable to be inlined in non-debug kernels. * If the lock can't be acquired or released trivially then the work is * deferred to 'tougher' functions. */ /* Acquire an exclusive lock. */ static __inline int __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file, int line) { uintptr_t tid = (uintptr_t)td; int error = 0; if (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) error = _sx_xlock_hard(sx, tid, opts, file, line); else - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(sx__acquire, sx, - 0, 0, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, + 0, 0, file, line, LOCKSTAT_WRITER); return (error); } /* Release an exclusive lock. */ static __inline void __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line) { uintptr_t tid = (uintptr_t)td; if (sx->sx_recurse == 0) - LOCKSTAT_PROFILE_RELEASE_LOCK(sx__release, sx); + LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, + LOCKSTAT_WRITER); if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)) _sx_xunlock_hard(sx, tid, file, line); } /* Acquire a shared lock. */ static __inline int __sx_slock(struct sx *sx, int opts, const char *file, int line) { uintptr_t x = sx->sx_lock; int error = 0; if (!(x & SX_LOCK_SHARED) || !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) error = _sx_slock_hard(sx, opts, file, line); else - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(sx__acquire, sx, - 0, 0, file, line); + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, + 0, 0, file, line, LOCKSTAT_READER); return (error); } /* * Release a shared lock. We can just drop a single shared lock so * long as we aren't trying to drop the last shared lock when other * threads are waiting for an exclusive lock. This takes advantage of * the fact that an unlocked lock is encoded as a shared lock with a * count of 0. */ static __inline void __sx_sunlock(struct sx *sx, const char *file, int line) { uintptr_t x = sx->sx_lock; - LOCKSTAT_PROFILE_RELEASE_LOCK(sx__release, sx); + LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER); if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) || !atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER)) _sx_sunlock_hard(sx, file, line); } /* * Public interface for lock operations. */ #ifndef LOCK_DEBUG #error "LOCK_DEBUG not defined, include before " #endif #if (LOCK_DEBUG > 0) || defined(SX_NOINLINE) #define sx_xlock_(sx, file, line) \ (void)_sx_xlock((sx), 0, (file), (line)) #define sx_xlock_sig_(sx, file, line) \ _sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line)) #define sx_xunlock_(sx, file, line) \ _sx_xunlock((sx), (file), (line)) #define sx_slock_(sx, file, line) \ (void)_sx_slock((sx), 0, (file), (line)) #define sx_slock_sig_(sx, file, line) \ _sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line)) #define sx_sunlock_(sx, file, line) \ _sx_sunlock((sx), (file), (line)) #else #define sx_xlock_(sx, file, line) \ (void)__sx_xlock((sx), curthread, 0, (file), (line)) #define sx_xlock_sig_(sx, file, line) \ __sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line)) #define sx_xunlock_(sx, file, line) \ __sx_xunlock((sx), curthread, (file), (line)) #define sx_slock_(sx, file, line) \ (void)__sx_slock((sx), 0, (file), (line)) #define sx_slock_sig_(sx, file, line) \ __sx_slock((sx), SX_INTERRUPTIBLE, (file), (line)) #define sx_sunlock_(sx, file, line) \ __sx_sunlock((sx), (file), (line)) #endif /* LOCK_DEBUG > 0 || SX_NOINLINE */ #define sx_try_slock(sx) sx_try_slock_((sx), LOCK_FILE, LOCK_LINE) #define sx_try_xlock(sx) sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_try_upgrade(sx) sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE) #define sx_downgrade(sx) sx_downgrade_((sx), LOCK_FILE, LOCK_LINE) #ifdef INVARIANTS #define sx_assert_(sx, what, file, line) \ _sx_assert((sx), (what), (file), (line)) #else #define sx_assert_(sx, what, file, line) (void)0 #endif #define sx_xlock(sx) sx_xlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_xlock_sig(sx) sx_xlock_sig_((sx), LOCK_FILE, LOCK_LINE) #define sx_xunlock(sx) sx_xunlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_slock(sx) sx_slock_((sx), LOCK_FILE, LOCK_LINE) #define sx_slock_sig(sx) sx_slock_sig_((sx), LOCK_FILE, LOCK_LINE) #define sx_sunlock(sx) sx_sunlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_assert(sx, what) sx_assert_((sx), (what), __FILE__, __LINE__) /* * Return a pointer to the owning thread if the lock is exclusively * locked. */ #define sx_xholder(sx) \ ((sx)->sx_lock & SX_LOCK_SHARED ? NULL : \ (struct thread *)SX_OWNER((sx)->sx_lock)) #define sx_xlocked(sx) \ (((sx)->sx_lock & ~(SX_LOCK_FLAGMASK & ~SX_LOCK_SHARED)) == \ (uintptr_t)curthread) #define sx_unlock_(sx, file, line) do { \ if (sx_xlocked(sx)) \ sx_xunlock_(sx, file, line); \ else \ sx_sunlock_(sx, file, line); \ } while (0) #define sx_unlock(sx) sx_unlock_((sx), LOCK_FILE, LOCK_LINE) #define sx_sleep(chan, sx, pri, wmesg, timo) \ _sleep((chan), &(sx)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) /* * Options passed to sx_init_flags(). */ #define SX_DUPOK 0x01 #define SX_NOPROFILE 0x02 #define SX_NOWITNESS 0x04 #define SX_QUIET 0x08 #define SX_NOADAPTIVE 0x10 #define SX_RECURSE 0x20 #define SX_NEW 0x40 /* * Options passed to sx_*lock_hard(). */ #define SX_INTERRUPTIBLE 0x40 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define SA_LOCKED LA_LOCKED #define SA_SLOCKED LA_SLOCKED #define SA_XLOCKED LA_XLOCKED #define SA_UNLOCKED LA_UNLOCKED #define SA_RECURSED LA_RECURSED #define SA_NOTRECURSED LA_NOTRECURSED /* Backwards compatability. */ #define SX_LOCKED LA_LOCKED #define SX_SLOCKED LA_SLOCKED #define SX_XLOCKED LA_XLOCKED #define SX_UNLOCKED LA_UNLOCKED #define SX_RECURSED LA_RECURSED #define SX_NOTRECURSED LA_NOTRECURSED #endif #endif /* _KERNEL */ #endif /* !_SYS_SX_H_ */