Index: head/sys/kern/subr_prf.c =================================================================== --- head/sys/kern/subr_prf.c (revision 333828) +++ head/sys/kern/subr_prf.c (revision 333829) @@ -1,1262 +1,1263 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)subr_prf.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #ifdef _KERNEL #include "opt_ddb.h" #include "opt_printf.h" #endif /* _KERNEL */ #include #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif #include #include #ifdef DDB #include #endif /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #ifdef _KERNEL #include #else #include #endif /* * This is needed for sbuf_putbuf() when compiled into userland. Due to the * shared nature of this file, it's the only place to put it. */ #ifndef _KERNEL #include #endif #ifdef _KERNEL #define TOCONS 0x01 #define TOTTY 0x02 #define TOLOG 0x04 /* Max number conversion buffer length: a u_quad_t in base 2, plus NUL byte. */ #define MAXNBUF (sizeof(intmax_t) * NBBY + 1) struct putchar_arg { int flags; int pri; struct tty *tty; char *p_bufr; size_t n_bufr; char *p_next; size_t remain; }; struct snprintf_arg { char *str; size_t remain; }; extern int log_open; static void msglogchar(int c, int pri); static void msglogstr(char *str, int pri, int filter_cr); static void putchar(int ch, void *arg); static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper); static void snprintf_func(int ch, void *arg); static int msgbufmapped; /* Set when safe to use msgbuf */ int msgbuftrigger; struct msgbuf *msgbufp; static int log_console_output = 1; SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RWTUN, &log_console_output, 0, "Duplicate console output to the syslog"); /* * See the comment in log_console() below for more explanation of this. */ static int log_console_add_linefeed; SYSCTL_INT(_kern, OID_AUTO, log_console_add_linefeed, CTLFLAG_RWTUN, &log_console_add_linefeed, 0, "log_console() adds extra newlines"); static int always_console_output; SYSCTL_INT(_kern, OID_AUTO, always_console_output, CTLFLAG_RWTUN, &always_console_output, 0, "Always output to console despite TIOCCONS"); /* * Warn that a system table is full. */ void tablefull(const char *tab) { log(LOG_ERR, "%s: table is full\n", tab); } /* * Uprintf prints to the controlling terminal for the current process. */ int uprintf(const char *fmt, ...) { va_list ap; struct putchar_arg pca; struct proc *p; struct thread *td; int retval; td = curthread; if (TD_IS_IDLETHREAD(td)) return (0); sx_slock(&proctree_lock); p = td->td_proc; PROC_LOCK(p); if ((p->p_flag & P_CONTROLT) == 0) { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); return (0); } SESS_LOCK(p->p_session); pca.tty = p->p_session->s_ttyp; SESS_UNLOCK(p->p_session); PROC_UNLOCK(p); if (pca.tty == NULL) { sx_sunlock(&proctree_lock); return (0); } pca.flags = TOTTY; pca.p_bufr = NULL; va_start(ap, fmt); tty_lock(pca.tty); sx_sunlock(&proctree_lock); retval = kvprintf(fmt, putchar, &pca, 10, ap); tty_unlock(pca.tty); va_end(ap); return (retval); } /* * tprintf and vtprintf print on the controlling terminal associated with the * given session, possibly to the log as well. */ void tprintf(struct proc *p, int pri, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vtprintf(p, pri, fmt, ap); va_end(ap); } void vtprintf(struct proc *p, int pri, const char *fmt, va_list ap) { struct tty *tp = NULL; int flags = 0; struct putchar_arg pca; struct session *sess = NULL; sx_slock(&proctree_lock); if (pri != -1) flags |= TOLOG; if (p != NULL) { PROC_LOCK(p); if (p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) { sess = p->p_session; sess_hold(sess); PROC_UNLOCK(p); tp = sess->s_ttyp; if (tp != NULL && tty_checkoutq(tp)) flags |= TOTTY; else tp = NULL; } else PROC_UNLOCK(p); } pca.pri = pri; pca.tty = tp; pca.flags = flags; pca.p_bufr = NULL; if (pca.tty != NULL) tty_lock(pca.tty); sx_sunlock(&proctree_lock); kvprintf(fmt, putchar, &pca, 10, ap); if (pca.tty != NULL) tty_unlock(pca.tty); if (sess != NULL) sess_release(sess); msgbuftrigger = 1; } /* * Ttyprintf displays a message on a tty; it should be used only by * the tty driver, or anything that knows the underlying tty will not * be revoke(2)'d away. Other callers should use tprintf. */ int ttyprintf(struct tty *tp, const char *fmt, ...) { va_list ap; struct putchar_arg pca; int retval; va_start(ap, fmt); pca.tty = tp; pca.flags = TOTTY; pca.p_bufr = NULL; retval = kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); return (retval); } static int _vprintf(int level, int flags, const char *fmt, va_list ap) { struct putchar_arg pca; int retval; #ifdef PRINTF_BUFR_SIZE char bufr[PRINTF_BUFR_SIZE]; #endif TSENTER(); pca.tty = NULL; pca.pri = level; pca.flags = flags; #ifdef PRINTF_BUFR_SIZE pca.p_bufr = bufr; pca.p_next = pca.p_bufr; pca.n_bufr = sizeof(bufr); pca.remain = sizeof(bufr); *pca.p_next = '\0'; #else /* Don't buffer console output. */ pca.p_bufr = NULL; #endif retval = kvprintf(fmt, putchar, &pca, 10, ap); #ifdef PRINTF_BUFR_SIZE /* Write any buffered console/log output: */ if (*pca.p_bufr != '\0') { if (pca.flags & TOLOG) msglogstr(pca.p_bufr, level, /*filter_cr*/1); if (pca.flags & TOCONS) cnputs(pca.p_bufr); } #endif TSEXIT(); return (retval); } /* * Log writes to the log buffer, and guarantees not to sleep (so can be * called by interrupt routines). If there is no process reading the * log yet, it writes to the console also. */ void log(int level, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vlog(level, fmt, ap); va_end(ap); } void vlog(int level, const char *fmt, va_list ap) { (void)_vprintf(level, log_open ? TOLOG : TOCONS | TOLOG, fmt, ap); msgbuftrigger = 1; } #define CONSCHUNK 128 void log_console(struct uio *uio) { int c, error, nl; char *consbuffer; int pri; if (!log_console_output) return; pri = LOG_INFO | LOG_CONSOLE; uio = cloneuio(uio); consbuffer = malloc(CONSCHUNK, M_TEMP, M_WAITOK); nl = 0; while (uio->uio_resid > 0) { c = imin(uio->uio_resid, CONSCHUNK - 1); error = uiomove(consbuffer, c, uio); if (error != 0) break; /* Make sure we're NUL-terminated */ consbuffer[c] = '\0'; if (consbuffer[c - 1] == '\n') nl = 1; else nl = 0; msglogstr(consbuffer, pri, /*filter_cr*/ 1); } /* * The previous behavior in log_console() is preserved when * log_console_add_linefeed is non-zero. For that behavior, if an * individual console write came in that was not terminated with a * line feed, it would add a line feed. * * This results in different data in the message buffer than * appears on the system console (which doesn't add extra line feed * characters). * * A number of programs and rc scripts write a line feed, or a period * and a line feed when they have completed their operation. On * the console, this looks seamless, but when displayed with * 'dmesg -a', you wind up with output that looks like this: * * Updating motd: * . * * On the console, it looks like this: * Updating motd:. * * We could add logic to detect that situation, or just not insert * the extra newlines. Set the kern.log_console_add_linefeed * sysctl/tunable variable to get the old behavior. */ if (!nl && log_console_add_linefeed) { consbuffer[0] = '\n'; consbuffer[1] = '\0'; msglogstr(consbuffer, pri, /*filter_cr*/ 1); } msgbuftrigger = 1; free(uio, M_IOV); free(consbuffer, M_TEMP); } int printf(const char *fmt, ...) { va_list ap; int retval; va_start(ap, fmt); retval = vprintf(fmt, ap); va_end(ap); return (retval); } int vprintf(const char *fmt, va_list ap) { int retval; retval = _vprintf(-1, TOCONS | TOLOG, fmt, ap); if (!panicstr) msgbuftrigger = 1; return (retval); } static void prf_putbuf(char *bufr, int flags, int pri) { if (flags & TOLOG) msglogstr(bufr, pri, /*filter_cr*/1); if (flags & TOCONS) { if ((panicstr == NULL) && (constty != NULL)) msgbuf_addstr(&consmsgbuf, -1, bufr, /*filter_cr*/ 0); if ((constty == NULL) ||(always_console_output)) cnputs(bufr); } } static void putbuf(int c, struct putchar_arg *ap) { /* Check if no console output buffer was provided. */ if (ap->p_bufr == NULL) { /* Output direct to the console. */ if (ap->flags & TOCONS) cnputc(c); if (ap->flags & TOLOG) msglogchar(c, ap->pri); } else { /* Buffer the character: */ *ap->p_next++ = c; ap->remain--; /* Always leave the buffer zero terminated. */ *ap->p_next = '\0'; /* Check if the buffer needs to be flushed. */ if (ap->remain == 2 || c == '\n') { prf_putbuf(ap->p_bufr, ap->flags, ap->pri); ap->p_next = ap->p_bufr; ap->remain = ap->n_bufr; *ap->p_next = '\0'; } /* * Since we fill the buffer up one character at a time, * this should not happen. We should always catch it when * ap->remain == 2 (if not sooner due to a newline), flush * the buffer and move on. One way this could happen is * if someone sets PRINTF_BUFR_SIZE to 1 or something * similarly silly. */ KASSERT(ap->remain > 2, ("Bad buffer logic, remain = %zd", ap->remain)); } } /* * Print a character on console or users terminal. If destination is * the console then the last bunch of characters are saved in msgbuf for * inspection later. */ static void putchar(int c, void *arg) { struct putchar_arg *ap = (struct putchar_arg*) arg; struct tty *tp = ap->tty; int flags = ap->flags; /* Don't use the tty code after a panic or while in ddb. */ if (kdb_active) { if (c != '\0') cnputc(c); return; } if ((flags & TOTTY) && tp != NULL && panicstr == NULL) tty_putchar(tp, c); if ((flags & (TOCONS | TOLOG)) && c != '\0') putbuf(c, ap); } /* * Scaled down version of sprintf(3). */ int sprintf(char *buf, const char *cfmt, ...) { int retval; va_list ap; va_start(ap, cfmt); retval = kvprintf(cfmt, NULL, (void *)buf, 10, ap); buf[retval] = '\0'; va_end(ap); return (retval); } /* * Scaled down version of vsprintf(3). */ int vsprintf(char *buf, const char *cfmt, va_list ap) { int retval; retval = kvprintf(cfmt, NULL, (void *)buf, 10, ap); buf[retval] = '\0'; return (retval); } /* * Scaled down version of snprintf(3). */ int snprintf(char *str, size_t size, const char *format, ...) { int retval; va_list ap; va_start(ap, format); retval = vsnprintf(str, size, format, ap); va_end(ap); return(retval); } /* * Scaled down version of vsnprintf(3). */ int vsnprintf(char *str, size_t size, const char *format, va_list ap) { struct snprintf_arg info; int retval; info.str = str; info.remain = size; retval = kvprintf(format, snprintf_func, &info, 10, ap); if (info.remain >= 1) *info.str++ = '\0'; return (retval); } /* * Kernel version which takes radix argument vsnprintf(3). */ int vsnrprintf(char *str, size_t size, int radix, const char *format, va_list ap) { struct snprintf_arg info; int retval; info.str = str; info.remain = size; retval = kvprintf(format, snprintf_func, &info, radix, ap); if (info.remain >= 1) *info.str++ = '\0'; return (retval); } static void snprintf_func(int ch, void *arg) { struct snprintf_arg *const info = arg; if (info->remain >= 2) { *info->str++ = ch; info->remain--; } } /* * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse * order; return an optional length and a pointer to the last character * written in the buffer (i.e., the first character of the string). * The buffer pointed to by `nbuf' must have length >= MAXNBUF. */ static char * ksprintn(char *nbuf, uintmax_t num, int base, int *lenp, int upper) { char *p, c; p = nbuf; *p = '\0'; do { c = hex2ascii(num % base); *++p = upper ? toupper(c) : c; } while (num /= base); if (lenp) *lenp = p - nbuf; return (p); } /* * Scaled down version of printf(3). * * Two additional formats: * * The format %b is supported to decode error registers. * Its usage is: * * printf("reg=%b\n", regval, "*"); * * where is the output base expressed as a control character, e.g. * \10 gives octal; \20 gives hex. Each arg is a sequence of characters, * the first of which gives the bit number to be inspected (origin 1), and * the next characters (up to a control character, i.e. a character <= 32), * give the name of the register. Thus: * * kvprintf("reg=%b\n", 3, "\10\2BITTWO\1BITONE"); * * would produce output: * * reg=3 * * XXX: %D -- Hexdump, takes pointer and separator string: * ("%6D", ptr, ":") -> XX:XX:XX:XX:XX:XX * ("%*D", len, ptr, " " -> XX XX XX XX ... */ int kvprintf(char const *fmt, void (*func)(int, void*), void *arg, int radix, va_list ap) { #define PCHAR(c) {int cc=(c); if (func) (*func)(cc,arg); else *d++ = cc; retval++; } char nbuf[MAXNBUF]; char *d; const char *p, *percent, *q; u_char *up; int ch, n; uintmax_t num; int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot; int cflag, hflag, jflag, tflag, zflag; int bconv, dwidth, upper; char padc; int stop = 0, retval = 0; num = 0; + q = NULL; if (!func) d = (char *) arg; else d = NULL; if (fmt == NULL) fmt = "(fmt null)\n"; if (radix < 2 || radix > 36) radix = 10; for (;;) { padc = ' '; width = 0; while ((ch = (u_char)*fmt++) != '%' || stop) { if (ch == '\0') return (retval); PCHAR(ch); } percent = fmt - 1; qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; sign = 0; dot = 0; bconv = 0; dwidth = 0; upper = 0; cflag = 0; hflag = 0; jflag = 0; tflag = 0; zflag = 0; reswitch: switch (ch = (u_char)*fmt++) { case '.': dot = 1; goto reswitch; case '#': sharpflag = 1; goto reswitch; case '+': sign = 1; goto reswitch; case '-': ladjust = 1; goto reswitch; case '%': PCHAR(ch); break; case '*': if (!dot) { width = va_arg(ap, int); if (width < 0) { ladjust = !ladjust; width = -width; } } else { dwidth = va_arg(ap, int); } goto reswitch; case '0': if (!dot) { padc = '0'; goto reswitch; } case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': for (n = 0;; ++fmt) { n = n * 10 + ch - '0'; ch = *fmt; if (ch < '0' || ch > '9') break; } if (dot) dwidth = n; else width = n; goto reswitch; case 'b': ladjust = 1; bconv = 1; goto handle_nosign; case 'c': width -= 1; if (!ladjust && width > 0) while (width--) PCHAR(padc); PCHAR(va_arg(ap, int)); if (ladjust && width > 0) while (width--) PCHAR(padc); break; case 'D': up = va_arg(ap, u_char *); p = va_arg(ap, char *); if (!width) width = 16; while(width--) { PCHAR(hex2ascii(*up >> 4)); PCHAR(hex2ascii(*up & 0x0f)); up++; if (width) for (q=p;*q;q++) PCHAR(*q); } break; case 'd': case 'i': base = 10; sign = 1; goto handle_sign; case 'h': if (hflag) { hflag = 0; cflag = 1; } else hflag = 1; goto reswitch; case 'j': jflag = 1; goto reswitch; case 'l': if (lflag) { lflag = 0; qflag = 1; } else lflag = 1; goto reswitch; case 'n': if (jflag) *(va_arg(ap, intmax_t *)) = retval; else if (qflag) *(va_arg(ap, quad_t *)) = retval; else if (lflag) *(va_arg(ap, long *)) = retval; else if (zflag) *(va_arg(ap, size_t *)) = retval; else if (hflag) *(va_arg(ap, short *)) = retval; else if (cflag) *(va_arg(ap, char *)) = retval; else *(va_arg(ap, int *)) = retval; break; case 'o': base = 8; goto handle_nosign; case 'p': base = 16; sharpflag = (width == 0); sign = 0; num = (uintptr_t)va_arg(ap, void *); goto number; case 'q': qflag = 1; goto reswitch; case 'r': base = radix; if (sign) goto handle_sign; goto handle_nosign; case 's': p = va_arg(ap, char *); if (p == NULL) p = "(null)"; if (!dot) n = strlen (p); else for (n = 0; n < dwidth && p[n]; n++) continue; width -= n; if (!ladjust && width > 0) while (width--) PCHAR(padc); while (n--) PCHAR(*p++); if (ladjust && width > 0) while (width--) PCHAR(padc); break; case 't': tflag = 1; goto reswitch; case 'u': base = 10; goto handle_nosign; case 'X': upper = 1; case 'x': base = 16; goto handle_nosign; case 'y': base = 16; sign = 1; goto handle_sign; case 'z': zflag = 1; goto reswitch; handle_nosign: sign = 0; if (jflag) num = va_arg(ap, uintmax_t); else if (qflag) num = va_arg(ap, u_quad_t); else if (tflag) num = va_arg(ap, ptrdiff_t); else if (lflag) num = va_arg(ap, u_long); else if (zflag) num = va_arg(ap, size_t); else if (hflag) num = (u_short)va_arg(ap, int); else if (cflag) num = (u_char)va_arg(ap, int); else num = va_arg(ap, u_int); if (bconv) { q = va_arg(ap, char *); base = *q++; } goto number; handle_sign: if (jflag) num = va_arg(ap, intmax_t); else if (qflag) num = va_arg(ap, quad_t); else if (tflag) num = va_arg(ap, ptrdiff_t); else if (lflag) num = va_arg(ap, long); else if (zflag) num = va_arg(ap, ssize_t); else if (hflag) num = (short)va_arg(ap, int); else if (cflag) num = (char)va_arg(ap, int); else num = va_arg(ap, int); number: if (sign && (intmax_t)num < 0) { neg = 1; num = -(intmax_t)num; } p = ksprintn(nbuf, num, base, &n, upper); tmp = 0; if (sharpflag && num != 0) { if (base == 8) tmp++; else if (base == 16) tmp += 2; } if (neg) tmp++; if (!ladjust && padc == '0') dwidth = width - tmp; width -= tmp + imax(dwidth, n); dwidth -= n; if (!ladjust) while (width-- > 0) PCHAR(' '); if (neg) PCHAR('-'); if (sharpflag && num != 0) { if (base == 8) { PCHAR('0'); } else if (base == 16) { PCHAR('0'); PCHAR('x'); } } while (dwidth-- > 0) PCHAR('0'); while (*p) PCHAR(*p--); if (bconv && num != 0) { /* %b conversion flag format. */ tmp = retval; while (*q) { n = *q++; if (num & (1 << (n - 1))) { PCHAR(retval != tmp ? ',' : '<'); for (; (n = *q) > ' '; ++q) PCHAR(n); } else for (; *q > ' '; ++q) continue; } if (retval != tmp) { PCHAR('>'); width -= retval - tmp; } } if (ladjust) while (width-- > 0) PCHAR(' '); break; default: while (percent < fmt) PCHAR(*percent++); /* * Since we ignore a formatting argument it is no * longer safe to obey the remaining formatting * arguments as the arguments will no longer match * the format specs. */ stop = 1; break; } } #undef PCHAR } /* * Put character in log buffer with a particular priority. */ static void msglogchar(int c, int pri) { static int lastpri = -1; static int dangling; char nbuf[MAXNBUF]; char *p; if (!msgbufmapped) return; if (c == '\0' || c == '\r') return; if (pri != -1 && pri != lastpri) { if (dangling) { msgbuf_addchar(msgbufp, '\n'); dangling = 0; } msgbuf_addchar(msgbufp, '<'); for (p = ksprintn(nbuf, (uintmax_t)pri, 10, NULL, 0); *p;) msgbuf_addchar(msgbufp, *p--); msgbuf_addchar(msgbufp, '>'); lastpri = pri; } msgbuf_addchar(msgbufp, c); if (c == '\n') { dangling = 0; lastpri = -1; } else { dangling = 1; } } static void msglogstr(char *str, int pri, int filter_cr) { if (!msgbufmapped) return; msgbuf_addstr(msgbufp, pri, str, filter_cr); } void msgbufinit(void *ptr, int size) { char *cp; static struct msgbuf *oldp = NULL; size -= sizeof(*msgbufp); cp = (char *)ptr; msgbufp = (struct msgbuf *)(cp + size); msgbuf_reinit(msgbufp, cp, size); if (msgbufmapped && oldp != msgbufp) msgbuf_copy(oldp, msgbufp); msgbufmapped = 1; oldp = msgbufp; } static int unprivileged_read_msgbuf = 1; SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_read_msgbuf, CTLFLAG_RW, &unprivileged_read_msgbuf, 0, "Unprivileged processes may read the kernel message buffer"); /* Sysctls for accessing/clearing the msgbuf */ static int sysctl_kern_msgbuf(SYSCTL_HANDLER_ARGS) { char buf[128]; u_int seq; int error, len; if (!unprivileged_read_msgbuf) { error = priv_check(req->td, PRIV_MSGBUF); if (error) return (error); } /* Read the whole buffer, one chunk at a time. */ mtx_lock(&msgbuf_lock); msgbuf_peekbytes(msgbufp, NULL, 0, &seq); for (;;) { len = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); mtx_unlock(&msgbuf_lock); if (len == 0) return (SYSCTL_OUT(req, "", 1)); /* add nulterm */ error = sysctl_handle_opaque(oidp, buf, len, req); if (error) return (error); mtx_lock(&msgbuf_lock); } } SYSCTL_PROC(_kern, OID_AUTO, msgbuf, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_msgbuf, "A", "Contents of kernel message buffer"); static int msgbuf_clearflag; static int sysctl_kern_msgbuf_clear(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) { mtx_lock(&msgbuf_lock); msgbuf_clear(msgbufp); mtx_unlock(&msgbuf_lock); msgbuf_clearflag = 0; } return (error); } SYSCTL_PROC(_kern, OID_AUTO, msgbuf_clear, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE | CTLFLAG_MPSAFE, &msgbuf_clearflag, 0, sysctl_kern_msgbuf_clear, "I", "Clear kernel message buffer"); #ifdef DDB DB_SHOW_COMMAND(msgbuf, db_show_msgbuf) { int i, j; if (!msgbufmapped) { db_printf("msgbuf not mapped yet\n"); return; } db_printf("msgbufp = %p\n", msgbufp); db_printf("magic = %x, size = %d, r= %u, w = %u, ptr = %p, cksum= %u\n", msgbufp->msg_magic, msgbufp->msg_size, msgbufp->msg_rseq, msgbufp->msg_wseq, msgbufp->msg_ptr, msgbufp->msg_cksum); for (i = 0; i < msgbufp->msg_size && !db_pager_quit; i++) { j = MSGBUF_SEQ_TO_POS(msgbufp, i + msgbufp->msg_rseq); db_printf("%c", msgbufp->msg_ptr[j]); } db_printf("\n"); } #endif /* DDB */ void hexdump(const void *ptr, int length, const char *hdr, int flags) { int i, j, k; int cols; const unsigned char *cp; char delim; if ((flags & HD_DELIM_MASK) != 0) delim = (flags & HD_DELIM_MASK) >> 8; else delim = ' '; if ((flags & HD_COLUMN_MASK) != 0) cols = flags & HD_COLUMN_MASK; else cols = 16; cp = ptr; for (i = 0; i < length; i+= cols) { if (hdr != NULL) printf("%s", hdr); if ((flags & HD_OMIT_COUNT) == 0) printf("%04x ", i); if ((flags & HD_OMIT_HEX) == 0) { for (j = 0; j < cols; j++) { k = i + j; if (k < length) printf("%c%02x", delim, cp[k]); else printf(" "); } } if ((flags & HD_OMIT_CHARS) == 0) { printf(" |"); for (j = 0; j < cols; j++) { k = i + j; if (k >= length) printf(" "); else if (cp[k] >= ' ' && cp[k] <= '~') printf("%c", cp[k]); else printf("."); } printf("|"); } printf("\n"); } } #endif /* _KERNEL */ void sbuf_hexdump(struct sbuf *sb, const void *ptr, int length, const char *hdr, int flags) { int i, j, k; int cols; const unsigned char *cp; char delim; if ((flags & HD_DELIM_MASK) != 0) delim = (flags & HD_DELIM_MASK) >> 8; else delim = ' '; if ((flags & HD_COLUMN_MASK) != 0) cols = flags & HD_COLUMN_MASK; else cols = 16; cp = ptr; for (i = 0; i < length; i+= cols) { if (hdr != NULL) sbuf_printf(sb, "%s", hdr); if ((flags & HD_OMIT_COUNT) == 0) sbuf_printf(sb, "%04x ", i); if ((flags & HD_OMIT_HEX) == 0) { for (j = 0; j < cols; j++) { k = i + j; if (k < length) sbuf_printf(sb, "%c%02x", delim, cp[k]); else sbuf_printf(sb, " "); } } if ((flags & HD_OMIT_CHARS) == 0) { sbuf_printf(sb, " |"); for (j = 0; j < cols; j++) { k = i + j; if (k >= length) sbuf_printf(sb, " "); else if (cp[k] >= ' ' && cp[k] <= '~') sbuf_printf(sb, "%c", cp[k]); else sbuf_printf(sb, "."); } sbuf_printf(sb, "|"); } sbuf_printf(sb, "\n"); } } #ifdef _KERNEL void counted_warning(unsigned *counter, const char *msg) { struct thread *td; unsigned c; for (;;) { c = *counter; if (c == 0) break; if (atomic_cmpset_int(counter, c, c - 1)) { td = curthread; log(LOG_INFO, "pid %d (%s) %s%s\n", td->td_proc->p_pid, td->td_name, msg, c > 1 ? "" : " - not logging anymore"); break; } } } #endif #ifdef _KERNEL void sbuf_putbuf(struct sbuf *sb) { prf_putbuf(sbuf_data(sb), TOLOG | TOCONS, -1); } #else void sbuf_putbuf(struct sbuf *sb) { printf("%s", sbuf_data(sb)); } #endif Index: head/sys/kern/uipc_sockbuf.c =================================================================== --- head/sys/kern/uipc_sockbuf.c (revision 333828) +++ head/sys/kern/uipc_sockbuf.c (revision 333829) @@ -1,1415 +1,1416 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_param.h" #include #include /* for aio_swake proto */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Function pointer set by the AIO routines so that the socket buffer code * can call back into the AIO module if it is loaded. */ void (*aio_swake)(struct socket *, struct sockbuf *); /* * Primitive routines for operating on socket buffers */ u_long sb_max = SB_MAX; u_long sb_max_adj = (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); static void sbflush_internal(struct sockbuf *sb); /* * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. */ static void sbm_clrprotoflags(struct mbuf *m, int flags) { int mask; mask = ~M_PROTOFLAGS; if (flags & PRUS_NOTREADY) mask |= M_NOTREADY; while (m) { m->m_flags &= mask; m = m->m_next; } } /* * Mark ready "count" mbufs starting with "m". */ int sbready(struct sockbuf *sb, struct mbuf *m, int count) { u_int blocker; SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; for (int i = 0; i < count; i++, m = m->m_next) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); m->m_flags &= ~(M_NOTREADY | blocker); if (blocker) sb->sb_acc += m->m_len; } if (!blocker) return (EINPROGRESS); /* This one was blocking all the queue. */ for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { KASSERT(m->m_flags & M_BLOCKED, ("%s: m %p !M_BLOCKED", __func__, m)); m->m_flags &= ~M_BLOCKED; sb->sb_acc += m->m_len; } sb->sb_fnrdy = m; return (0); } /* * Adjust sockbuf state reflecting allocation of m. */ void sballoc(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) { if (m->m_flags & M_NOTREADY) sb->sb_fnrdy = m; else sb->sb_acc += m->m_len; } else m->m_flags |= M_BLOCKED; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl += m->m_len; sb->sb_mbcnt += MSIZE; sb->sb_mcnt += 1; if (m->m_flags & M_EXT) { sb->sb_mbcnt += m->m_ext.ext_size; sb->sb_ccnt += 1; } } /* * Adjust sockbuf state reflecting freeing of m. */ void sbfree(struct sockbuf *sb, struct mbuf *m) { #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ SOCKBUF_LOCK_ASSERT(sb); #endif sb->sb_ccc -= m->m_len; if (!(m->m_flags & M_NOTAVAIL)) sb->sb_acc -= m->m_len; if (m == sb->sb_fnrdy) { struct mbuf *n; KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); n = m->m_next; while (n != NULL && !(n->m_flags & M_NOTREADY)) { n->m_flags &= ~M_BLOCKED; sb->sb_acc += n->m_len; n = n->m_next; } sb->sb_fnrdy = n; } if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= m->m_len; sb->sb_mbcnt -= MSIZE; sb->sb_mcnt -= 1; if (m->m_flags & M_EXT) { sb->sb_mbcnt -= m->m_ext.ext_size; sb->sb_ccnt -= 1; } if (sb->sb_sndptr == m) { sb->sb_sndptr = NULL; sb->sb_sndptroff = 0; } if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= m->m_len; } /* * Socantsendmore indicates that no more data will be sent on the socket; it * would normally be applied to a socket when the user informs the system * that no more data is to be sent, by the protocol code (in case * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be * received, and will normally be applied to the socket by a protocol when it * detects that the peer will send no more data. Data queued for reading in * the socket may yet be read. */ void socantsendmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); } void socantsendmore(struct socket *so) { SOCKBUF_LOCK(&so->so_snd); socantsendmore_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); } void socantrcvmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); so->so_rcv.sb_state |= SBS_CANTRCVMORE; sorwakeup_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); } void socantrcvmore(struct socket *so) { SOCKBUF_LOCK(&so->so_rcv); socantrcvmore_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); } /* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags |= SB_WAIT; return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); } int sblock(struct sockbuf *sb, int flags) { KASSERT((flags & SBL_VALID) == flags, ("sblock: flags invalid (0x%x)", flags)); if (flags & SBL_WAIT) { if ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR)) { sx_xlock(&sb->sb_sx); return (0); } return (sx_xlock_sig(&sb->sb_sx)); } else { if (sx_try_xlock(&sb->sb_sx) == 0) return (EWOULDBLOCK); return (0); } } void sbunlock(struct sockbuf *sb) { sx_xunlock(&sb->sb_sx); } /* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. * * Called with the socket buffer lock held; will release the lock by the end * of the function. This allows the caller to acquire the socket buffer lock * while testing for the need for various sorts of wakeup and hold it through * to the point where it's no longer required. We currently hold the lock * through calls out to other subsystems (with the exception of kqueue), and * then release it to avoid lock order issues. It's not clear that's * correct. */ void sowakeup(struct socket *so, struct sockbuf *sb) { int ret; SOCKBUF_LOCK_ASSERT(sb); selwakeuppri(sb->sb_sel, PSOCK); if (!SEL_WAITING(sb->sb_sel)) sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_acc); } KNOTE_LOCKED(&sb->sb_sel->si_note, 0); if (sb->sb_upcall != NULL) { ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); if (ret == SU_ISCONNECTED) { KASSERT(sb == &so->so_rcv, ("SO_SND upcall returned SU_ISCONNECTED")); soupcall_clear(so, SO_RCV); } } else ret = SU_OK; if (sb->sb_flags & SB_AIO) sowakeup_aio(so, sb); SOCKBUF_UNLOCK(sb); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); } /* * Socket buffer (struct sockbuf) utility routines. * * Each socket contains two socket buffers: one for sending data and one for * receiving data. Each buffer contains a queue of mbufs, information about * the number of mbufs and amount of data in the queue, and other fields * allowing select() statements and notification on data availability to be * implemented. * * Data stored in a socket buffer is maintained as a list of records. Each * record is a list of mbufs chained together with the m_next field. Records * are chained together with the m_nextpkt field. The upper level routine * soreceive() expects the following conventions to be observed when placing * information in the receive buffer: * * 1. If the protocol requires each message be preceded by the sender's name, * then a record containing that name must be present before any * associated data (mbuf's must be of type MT_SONAME). * 2. If the protocol supports the exchange of ``access rights'' (really just * additional data associated with the message), and there are ``rights'' * to be received, then a record containing this data should be present * (mbuf's must be of type MT_RIGHTS). * 3. If a name or rights record exists, then it must be followed by a data * record, perhaps of zero length. * * Before using a new socket structure it is first necessary to reserve * buffer space to the socket, by calling sbreserve(). This should commit * some of the available buffer space in the system buffer pool for the * socket (currently, it does nothing but enforce limits). The space should * be released by calling sbrelease() when the socket is destroyed. */ int soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { struct thread *td = curthread; SOCKBUF_LOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) goto bad; if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_snd); return (0); bad2: sbrelease_locked(&so->so_snd, so); bad: SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_snd); return (ENOBUFS); } static int sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) { int error = 0; u_long tmp_sb_max = sb_max; error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); if (error || !req->newptr) return (error); if (tmp_sb_max < MSIZE + MCLBYTES) return (EINVAL); sb_max = tmp_sb_max; sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); return (0); } /* * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td) { rlim_t sbsize_limit; SOCKBUF_LOCK_ASSERT(sb); /* * When a thread is passed, we take into account the thread's socket * buffer size limit. The caller will generally pass curthread, but * in the TCP input path, NULL will be passed to indicate that no * appropriate thread resource limits are available. In that case, * we don't apply a process limit. */ if (cc > sb_max_adj) return (0); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) return (0); sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (1); } int sbsetopt(struct socket *so, int cmd, u_long cc) { struct sockbuf *sb; short *flags; u_int *hiwat, *lowat; int error; + sb = NULL; SOCK_LOCK(so); if (SOLISTENING(so)) { switch (cmd) { case SO_SNDLOWAT: case SO_SNDBUF: lowat = &so->sol_sbsnd_lowat; hiwat = &so->sol_sbsnd_hiwat; flags = &so->sol_sbsnd_flags; break; case SO_RCVLOWAT: case SO_RCVBUF: lowat = &so->sol_sbrcv_lowat; hiwat = &so->sol_sbrcv_hiwat; flags = &so->sol_sbrcv_flags; break; } } else { switch (cmd) { case SO_SNDLOWAT: case SO_SNDBUF: sb = &so->so_snd; break; case SO_RCVLOWAT: case SO_RCVBUF: sb = &so->so_rcv; break; } flags = &sb->sb_flags; hiwat = &sb->sb_hiwat; lowat = &sb->sb_lowat; SOCKBUF_LOCK(sb); } error = 0; switch (cmd) { case SO_SNDBUF: case SO_RCVBUF: if (SOLISTENING(so)) { if (cc > sb_max_adj) { error = ENOBUFS; break; } *hiwat = cc; if (*lowat > *hiwat) *lowat = *hiwat; } else { if (!sbreserve_locked(sb, cc, so, curthread)) error = ENOBUFS; } if (error == 0) *flags &= ~SB_AUTOSIZE; break; case SO_SNDLOWAT: case SO_RCVLOWAT: /* * Make sure the low-water is never greater than the * high-water. */ *lowat = (cc > *hiwat) ? *hiwat : cc; break; } if (!SOLISTENING(so)) SOCKBUF_UNLOCK(sb); SOCK_UNLOCK(so); return (error); } /* * Free mbufs held by a socket, and reserved mbuf space. */ void sbrelease_internal(struct sockbuf *sb, struct socket *so) { sbflush_internal(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); sb->sb_mbmax = 0; } void sbrelease_locked(struct sockbuf *sb, struct socket *so) { SOCKBUF_LOCK_ASSERT(sb); sbrelease_internal(sb, so); } void sbrelease(struct sockbuf *sb, struct socket *so) { SOCKBUF_LOCK(sb); sbrelease_locked(sb, so); SOCKBUF_UNLOCK(sb); } void sbdestroy(struct sockbuf *sb, struct socket *so) { sbrelease_internal(sb, so); } /* * Routines to add and remove data from an mbuf queue. * * The routines sbappend() or sbappendrecord() are normally called to append * new mbufs to a socket buffer, after checking that adequate space is * available, comparing the function sbspace() with the amount of data to be * added. sbappendrecord() differs from sbappend() in that data supplied is * treated as the beginning of a new record. To place a sender's address, * optional access rights, and data in a socket receive buffer, * sbappendaddr() should be used. To place access rights and data in a * socket receive buffer, sbappendrights() should be used. In either case, * the new data begins a new record. Note that unlike sbappend() and * sbappendrecord(), these routines check for the caller that there will be * enough space to store the data. Each fails if there is not enough space, * or if it cannot find mbufs to store additional information in. * * Reliable protocols may use the socket send buffer to hold data awaiting * acknowledgement. Data is normally copied from a socket send buffer in a * protocol with m_copy for output to a peer, and then removing the data from * the socket buffer with sbdrop() or sbdroprecord() when the data is * acknowledged by the peer. */ #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; if (m != sb->sb_lastrecord) { printf("%s: sb_mb %p sb_lastrecord %p last %p\n", __func__, sb->sb_mb, sb->sb_lastrecord, m); printf("packet chain:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) printf("\t%p\n", m); panic("%s from %s:%u", __func__, file, line); } } void sblastmbufchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mbtail) { printf("%s: sb_mb %p sb_mbtail %p last %p\n", __func__, sb->sb_mb, sb->sb_mbtail, m); printf("packet tree:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) printf("%p ", n); printf("\n"); } panic("%s from %s:%u", __func__, file, line); } } #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ (sb)->sb_mb = (m0); \ (sb)->sb_lastrecord = (m0); \ } while (/*CONSTCOND*/0) /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) { struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); if (m == NULL) return; sbm_clrprotoflags(m, flags); SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * XXX Would like to simply use sb_mbtail here, but * XXX I need to verify that I won't miss an EOR that * XXX way. */ if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * If this is the first record in the socket buffer, * it's also the last record. */ sb->sb_lastrecord = m; } } sbcompress(sb, m, n); SBLASTRECORDCHK(sb); } /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void sbappend(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappend_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); SBLASTMBUFCHK(sb); /* Remove all packet headers and mbuf tags to get a pure data chain. */ m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); sbcompress(sb, m, sb->sb_mbtail); sb->sb_lastrecord = sb->sb_mb; SBLASTRECORDCHK(sb); } /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappendstream_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } #ifdef SOCKBUF_DEBUG void sbcheck(struct sockbuf *sb, const char *file, int line) { struct mbuf *m, *n, *fnrdy; u_long acc, ccc, mbcnt; SOCKBUF_LOCK_ASSERT(sb); acc = ccc = mbcnt = 0; fnrdy = NULL; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { if (m->m_len == 0) { printf("sb %p empty mbuf %p\n", sb, m); goto fail; } if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { if (m != sb->sb_fnrdy) { printf("sb %p: fnrdy %p != m %p\n", sb, sb->sb_fnrdy, m); goto fail; } fnrdy = m; } if (fnrdy) { if (!(m->m_flags & M_NOTAVAIL)) { printf("sb %p: fnrdy %p, m %p is avail\n", sb, sb->sb_fnrdy, m); goto fail; } } else acc += m->m_len; ccc += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } } if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); goto fail; } return; fail: panic("%s from %s:%u", __func__, file, line); } #endif /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); if (m0 == NULL) return; m_clrprotoflags(m0); /* * Put the first mbuf on the queue. Note this permits zero length * records. */ sballoc(sb, m0); SBLASTRECORDCHK(sb); SBLINKRECORD(sb, m0); sb->sb_mbtail = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } /* always call sbcompress() so it can do SBLASTMBUFCHK() */ sbcompress(sb, m, m0); } /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord(struct sockbuf *sb, struct mbuf *m0) { SOCKBUF_LOCK(sb); sbappendrecord_locked(sb, m0); SOCKBUF_UNLOCK(sb); } /* Helper routine that appends data, control, and address to a sockbuf. */ static int sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) { struct mbuf *m, *n, *nlast; #if MSIZE <= 256 if (asa->sa_len > MLEN) return (0); #endif m = m_get(M_NOWAIT, MT_SONAME); if (m == NULL) return (0); m->m_len = asa->sa_len; bcopy(asa, mtod(m, caddr_t), asa->sa_len); if (m0) { m_clrprotoflags(m0); m_tag_delete_chain(m0, NULL); /* * Clear some persistent info from pkthdr. * We don't use m_demote(), because some netgraph consumers * expect M_PKTHDR presence. */ m0->m_pkthdr.rcvif = NULL; m0->m_pkthdr.flowid = 0; m0->m_pkthdr.csum_flags = 0; m0->m_pkthdr.fibnum = 0; m0->m_pkthdr.rsstype = 0; } if (ctrl_last) ctrl_last->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); sballoc(sb, n); nlast = n; SBLINKRECORD(sb, m); sb->sb_mbtail = nlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; int space = asa->sa_len; SOCKBUF_LOCK_ASSERT(sb); if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr_locked"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &ctrl_last); if (space > sbspace(sb)) return (0); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if insufficient mbufs. Does not validate space * on the receiving sockbuf. */ int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; SOCKBUF_LOCK_ASSERT(sb); ctrl_last = (control == NULL) ? NULL : m_last(control); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendaddr_locked(sb, asa, m0, control); SOCKBUF_UNLOCK(sb); return (retval); } int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { struct mbuf *m, *n, *mlast; int space; SOCKBUF_LOCK_ASSERT(sb); if (control == NULL) panic("sbappendcontrol_locked"); space = m_length(control, &n) + m_length(m0, NULL); if (space > sbspace(sb)) return (0); m_clrprotoflags(m0); n->m_next = m0; /* concatenate data to control */ SBLASTRECORDCHK(sb); for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); sballoc(sb, m); mlast = m; SBLINKRECORD(sb, control); sb->sb_mbtail = mlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); } int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendcontrol_locked(sb, m0, control); SOCKBUF_UNLOCK(sb); return (retval); } /* * Append the data in mbuf chain (m) into the socket buffer sb following mbuf * (n). If (n) is NULL, the buffer is presumed empty. * * When the data is compressed, mbufs in the chain may be handled in one of * three ways: * * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no * record boundary, and no change in data type). * * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into * an mbuf already in the socket buffer. This can occur if an * appropriate mbuf exists, there is room, both mbufs are not marked as * not ready, and no merging of data types will occur. * * (3) The mbuf may be appended to the end of the existing mbuf chain. * * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as * end-of-record. */ void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { int eor = 0; struct mbuf *o; SOCKBUF_LOCK_ASSERT(sb); while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { if (sb->sb_lastrecord == m) sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } if (n && (n->m_flags & M_EOR) == 0 && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(m->m_flags & M_NOTREADY) && !(n->m_flags & M_NOTREADY) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, (unsigned)m->m_len); n->m_len += m->m_len; sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) sb->sb_acc += m->m_len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) /* XXX: Probably don't need.*/ sb->sb_ctl += m->m_len; m = m_free(m); continue; } if (n) n->m_next = m; else sb->sb_mb = m; sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); n->m_flags |= eor; } SBLASTMBUFCHK(sb); } /* * Free all mbufs in a sockbuf. Check that all resources are reclaimed. */ static void sbflush_internal(struct sockbuf *sb) { while (sb->sb_mbcnt) { /* * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: * we would loop forever. Panic instead. */ if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) break; m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); } KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); } void sbflush_locked(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sbflush_internal(sb); } void sbflush(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbflush_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Cut data from (the front of) a sockbuf. */ static struct mbuf * sbcut_internal(struct sockbuf *sb, int len) { struct mbuf *m, *next, *mfree; KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", __func__, len)); KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", __func__, len, sb->sb_ccc)); next = (m = sb->sb_mb) ? m->m_nextpkt : 0; mfree = NULL; while (len > 0) { if (m == NULL) { KASSERT(next, ("%s: no next, len %d", __func__, len)); m = next; next = m->m_nextpkt; } if (m->m_len > len) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p M_NOTAVAIL", __func__, m)); m->m_len -= len; m->m_data += len; sb->sb_ccc -= len; sb->sb_acc -= len; if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= len; break; } len -= m->m_len; sbfree(sb, m); /* * Do not put M_NOTREADY buffers to the free list, they * are referenced from outside. */ if (m->m_flags & M_NOTREADY) m = m->m_next; else { struct mbuf *n; n = m->m_next; m->m_next = mfree; mfree = m; m = n; } } /* * Free any zero-length mbufs from the buffer. * For SOCK_DGRAM sockets such mbufs represent empty records. * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, * when sosend_generic() needs to send only control data. */ while (m && m->m_len == 0) { struct mbuf *n; sbfree(sb, m); n = m->m_next; m->m_next = mfree; mfree = m; m = n; } if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; /* * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure * sb_lastrecord is up-to-date if we dropped part of the last record. */ m = sb->sb_mb; if (m == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (m->m_nextpkt == NULL) { sb->sb_lastrecord = m; } return (mfree); } /* * Drop data from (the front of) a sockbuf. */ void sbdrop_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); m_freem(sbcut_internal(sb, len)); } /* * Drop data from (the front of) a sockbuf, * and return it to caller. */ struct mbuf * sbcut_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); return (sbcut_internal(sb, len)); } void sbdrop(struct sockbuf *sb, int len) { struct mbuf *mfree; SOCKBUF_LOCK(sb); mfree = sbcut_internal(sb, len); SOCKBUF_UNLOCK(sb); m_freem(mfree); } /* * Maintain a pointer and offset pair into the socket buffer mbuf chain to * avoid traversal of the entire socket buffer for larger offsets. */ struct mbuf * sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff) { struct mbuf *m, *ret; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__)); KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__)); /* * Is off below stored offset? Happens on retransmits. * Just return, we can't help here. */ if (sb->sb_sndptroff > off) { *moff = off; return (sb->sb_mb); } /* Return closest mbuf in chain for current offset. */ *moff = off - sb->sb_sndptroff; m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb; if (*moff == m->m_len) { *moff = 0; sb->sb_sndptroff += m->m_len; m = ret = m->m_next; KASSERT(ret->m_len > 0, ("mbuf %p in sockbuf %p chain has no valid data", ret, sb)); } /* Advance by len to be as close as possible for the next transmit. */ for (off = off - sb->sb_sndptroff + len - 1; off > 0 && m != NULL && off >= m->m_len; m = m->m_next) { sb->sb_sndptroff += m->m_len; off -= m->m_len; } if (off > 0 && m == NULL) panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret); sb->sb_sndptr = m; return (ret); } /* * Return the first mbuf and the mbuf data offset for the provided * send offset without changing the "sb_sndptroff" field. */ struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) { struct mbuf *m; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); /* * If the "off" is below the stored offset, which happens on * retransmits, just use "sb_mb": */ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { m = sb->sb_mb; } else { m = sb->sb_sndptr; off -= sb->sb_sndptroff; } while (off > 0 && m != NULL) { if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } *moff = off; return (m); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord_locked(struct sockbuf *sb) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; do { sbfree(sb, m); m = m_free(m); } while (m); } SB_EMPTY_FIXUP(sb); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbdroprecord_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Create a "control" mbuf containing the specified data with the specified * type for presentation on a socket buffer. */ struct mbuf * sbcreatecontrol(caddr_t p, int size, int type, int level) { struct cmsghdr *cp; struct mbuf *m; if (CMSG_SPACE((u_int)size) > MCLBYTES) return ((struct mbuf *) NULL); if (CMSG_SPACE((u_int)size) > MLEN) m = m_getcl(M_NOWAIT, MT_CONTROL, 0); else m = m_get(M_NOWAIT, MT_CONTROL); if (m == NULL) return ((struct mbuf *) NULL); cp = mtod(m, struct cmsghdr *); m->m_len = 0; KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), ("sbcreatecontrol: short mbuf")); /* * Don't leave the padding between the msg header and the * cmsg data and the padding after the cmsg data un-initialized. */ bzero(cp, CMSG_SPACE((u_int)size)); if (p != NULL) (void)memcpy(CMSG_DATA(cp), p, size); m->m_len = CMSG_SPACE(size); cp->cmsg_len = CMSG_LEN(size); cp->cmsg_level = level; cp->cmsg_type = type; return (m); } /* * This does the same for socket buffers that sotoxsocket does for sockets: * generate an user-format data structure describing the socket buffer. Note * that the xsockbuf structure, since it is always embedded in a socket, does * not include a self pointer nor a length. We make this entry point public * in case some other mechanism needs it. */ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mcnt = sb->sb_mcnt; xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, "Socket buffer size waste factor");