Index: projects/clang370-import/contrib/elftoolchain/addr2line/addr2line.c =================================================================== --- projects/clang370-import/contrib/elftoolchain/addr2line/addr2line.c (revision 288125) +++ projects/clang370-import/contrib/elftoolchain/addr2line/addr2line.c (revision 288126) @@ -1,453 +1,459 @@ /*- * Copyright (c) 2009 Kai Wang * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "_elftc.h" ELFTC_VCSID("$Id: addr2line.c 3197 2015-05-12 21:01:31Z emaste $"); static struct option longopts[] = { {"target" , required_argument, NULL, 'b'}, {"demangle", no_argument, NULL, 'C'}, {"exe", required_argument, NULL, 'e'}, {"functions", no_argument, NULL, 'f'}, {"section", required_argument, NULL, 'j'}, {"basename", no_argument, NULL, 's'}, {"help", no_argument, NULL, 'H'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0} }; static int demangle, func, base; static char unknown[] = { '?', '?', '\0' }; static Dwarf_Addr section_base; #define USAGE_MESSAGE "\ Usage: %s [options] hexaddress...\n\ Map program addresses to source file names and line numbers.\n\n\ Options:\n\ -b TGT | --target=TGT (Accepted but ignored).\n\ -e EXE | --exec=EXE Use program \"EXE\" to translate addresses.\n\ -f | --functions Display function names.\n\ -j NAME | --section=NAME Values are offsets into section \"NAME\".\n\ -s | --basename Only show the base name for each file name.\n\ -C | --demangle Demangle C++ names.\n\ -H | --help Print a help message.\n\ -V | --version Print a version identifier and exit.\n" static void usage(void) { (void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME()); exit(1); } static void version(void) { fprintf(stderr, "%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version()); exit(0); } /* * Handle DWARF 4 'offset from' DW_AT_high_pc. Although we don't * fully support DWARF 4, some compilers (like FreeBSD Clang 3.5.1) * generate DW_AT_high_pc as an offset from DW_AT_low_pc. * * "If the value of the DW_AT_high_pc is of class address, it is the * relocated address of the first location past the last instruction * associated with the entity; if it is of class constant, the value * is an unsigned integer offset which when added to the low PC gives * the address of the first location past the last instruction * associated with the entity." * * DWARF4 spec, section 2.17.2. */ static int handle_high_pc(Dwarf_Die die, Dwarf_Unsigned lopc, Dwarf_Unsigned *hipc) { Dwarf_Error de; Dwarf_Half form; Dwarf_Attribute at; int ret; ret = dwarf_attr(die, DW_AT_high_pc, &at, &de); if (ret == DW_DLV_ERROR) { warnx("dwarf_attr failed: %s", dwarf_errmsg(de)); return (ret); } ret = dwarf_whatform(at, &form, &de); if (ret == DW_DLV_ERROR) { warnx("dwarf_whatform failed: %s", dwarf_errmsg(de)); return (ret); } if (dwarf_get_form_class(2, 0, 0, form) == DW_FORM_CLASS_CONSTANT) *hipc += lopc; return (DW_DLV_OK); } static void search_func(Dwarf_Debug dbg, Dwarf_Die die, Dwarf_Addr addr, const char **rlt_func) { Dwarf_Die ret_die, spec_die; Dwarf_Error de; Dwarf_Half tag; Dwarf_Unsigned lopc, hipc; Dwarf_Off ref; Dwarf_Attribute sub_at, spec_at; char *func0; int ret; if (*rlt_func != NULL) return; if (dwarf_tag(die, &tag, &de)) { warnx("dwarf_tag: %s", dwarf_errmsg(de)); goto cont_search; } if (tag == DW_TAG_subprogram) { if (dwarf_attrval_unsigned(die, DW_AT_low_pc, &lopc, &de) || dwarf_attrval_unsigned(die, DW_AT_high_pc, &hipc, &de)) goto cont_search; if (handle_high_pc(die, lopc, &hipc) != DW_DLV_OK) goto cont_search; if (addr < lopc || addr >= hipc) goto cont_search; /* Found it! */ *rlt_func = unknown; ret = dwarf_attr(die, DW_AT_name, &sub_at, &de); if (ret == DW_DLV_ERROR) return; if (ret == DW_DLV_OK) { if (dwarf_formstring(sub_at, &func0, &de)) *rlt_func = unknown; else *rlt_func = func0; return; } /* * If DW_AT_name is not present, but DW_AT_specification is * present, then probably the actual name is in the DIE * referenced by DW_AT_specification. */ if (dwarf_attr(die, DW_AT_specification, &spec_at, &de)) return; if (dwarf_global_formref(spec_at, &ref, &de)) return; if (dwarf_offdie(dbg, ref, &spec_die, &de)) return; if (dwarf_attrval_string(spec_die, DW_AT_name, rlt_func, &de)) *rlt_func = unknown; return; } cont_search: /* Search children. */ ret = dwarf_child(die, &ret_die, &de); if (ret == DW_DLV_ERROR) errx(EXIT_FAILURE, "dwarf_child: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) search_func(dbg, ret_die, addr, rlt_func); /* Search sibling. */ ret = dwarf_siblingof(dbg, die, &ret_die, &de); if (ret == DW_DLV_ERROR) errx(EXIT_FAILURE, "dwarf_siblingof: %s", dwarf_errmsg(de)); else if (ret == DW_DLV_OK) search_func(dbg, ret_die, addr, rlt_func); } static void translate(Dwarf_Debug dbg, const char* addrstr) { Dwarf_Die die; Dwarf_Line *lbuf; Dwarf_Error de; Dwarf_Half tag; Dwarf_Unsigned lopc, hipc, addr, lineno, plineno; Dwarf_Signed lcount; Dwarf_Addr lineaddr, plineaddr; const char *funcname; char *file, *file0, *pfile; char demangled[1024]; int i, ret; addr = strtoull(addrstr, NULL, 16); addr += section_base; lineno = 0; file = unknown; while ((ret = dwarf_next_cu_header(dbg, NULL, NULL, NULL, NULL, NULL, &de)) == DW_DLV_OK) { die = NULL; while (dwarf_siblingof(dbg, die, &die, &de) == DW_DLV_OK) { if (dwarf_tag(die, &tag, &de) != DW_DLV_OK) { warnx("dwarf_tag failed: %s", dwarf_errmsg(de)); goto out; } /* XXX: What about DW_TAG_partial_unit? */ if (tag == DW_TAG_compile_unit) break; } if (die == NULL) { warnx("could not find DW_TAG_compile_unit die"); goto out; } if (!dwarf_attrval_unsigned(die, DW_AT_low_pc, &lopc, &de) && !dwarf_attrval_unsigned(die, DW_AT_high_pc, &hipc, &de)) { /* * Check if the address falls into the PC range of * this CU. */ if (handle_high_pc(die, lopc, &hipc) != DW_DLV_OK) continue; if (addr < lopc || addr >= hipc) continue; } - if (dwarf_srclines(die, &lbuf, &lcount, &de) != DW_DLV_OK) { + switch (dwarf_srclines(die, &lbuf, &lcount, &de)) { + case DW_DLV_OK: + break; + case DW_DLV_NO_ENTRY: + /* If one CU lacks debug info, just skip it. */ + continue; + default: warnx("dwarf_srclines: %s", dwarf_errmsg(de)); goto out; } plineaddr = ~0ULL; plineno = 0; pfile = unknown; for (i = 0; i < lcount; i++) { if (dwarf_lineaddr(lbuf[i], &lineaddr, &de)) { warnx("dwarf_lineaddr: %s", dwarf_errmsg(de)); goto out; } if (dwarf_lineno(lbuf[i], &lineno, &de)) { warnx("dwarf_lineno: %s", dwarf_errmsg(de)); goto out; } if (dwarf_linesrc(lbuf[i], &file0, &de)) { warnx("dwarf_linesrc: %s", dwarf_errmsg(de)); } else file = file0; if (addr == lineaddr) goto out; else if (addr < lineaddr && addr > plineaddr) { lineno = plineno; file = pfile; goto out; } plineaddr = lineaddr; plineno = lineno; pfile = file; } } out: funcname = NULL; if (ret == DW_DLV_OK && func) search_func(dbg, die, addr, &funcname); if (func) { if (funcname == NULL) funcname = unknown; if (demangle && !elftc_demangle(funcname, demangled, sizeof(demangled), 0)) printf("%s\n", demangled); else printf("%s\n", funcname); } (void) printf("%s:%ju\n", base ? basename(file) : file, lineno); /* * Reset internal CU pointer, so we will start from the first CU * next round. */ while (ret != DW_DLV_NO_ENTRY) { if (ret == DW_DLV_ERROR) errx(EXIT_FAILURE, "dwarf_next_cu_header: %s", dwarf_errmsg(de)); ret = dwarf_next_cu_header(dbg, NULL, NULL, NULL, NULL, NULL, &de); } } static void find_section_base(const char *exe, Elf *e, const char *section) { Dwarf_Addr off; Elf_Scn *scn; GElf_Ehdr eh; GElf_Shdr sh; size_t shstrndx; int elferr; const char *name; if (gelf_getehdr(e, &eh) != &eh) { warnx("gelf_getehdr failed: %s", elf_errmsg(-1)); return; } if (!elf_getshstrndx(e, &shstrndx)) { warnx("elf_getshstrndx failed: %s", elf_errmsg(-1)); return; } (void) elf_errno(); off = 0; scn = NULL; while ((scn = elf_nextscn(e, scn)) != NULL) { if (gelf_getshdr(scn, &sh) == NULL) { warnx("gelf_getshdr failed: %s", elf_errmsg(-1)); continue; } if ((name = elf_strptr(e, shstrndx, sh.sh_name)) == NULL) goto next; if (!strcmp(section, name)) { if (eh.e_type == ET_EXEC || eh.e_type == ET_DYN) { /* * For executables, section base is the virtual * address of the specified section. */ section_base = sh.sh_addr; } else if (eh.e_type == ET_REL) { /* * For relocatables, section base is the * relative offset of the specified section * to the start of the first section. */ section_base = off; } else warnx("unknown e_type %u", eh.e_type); return; } next: off += sh.sh_size; } elferr = elf_errno(); if (elferr != 0) warnx("elf_nextscn failed: %s", elf_errmsg(elferr)); errx(EXIT_FAILURE, "%s: cannot find section %s", exe, section); } int main(int argc, char **argv) { Elf *e; Dwarf_Debug dbg; Dwarf_Error de; const char *exe, *section; char line[1024]; int fd, i, opt; exe = NULL; section = NULL; while ((opt = getopt_long(argc, argv, "b:Ce:fj:sHV", longopts, NULL)) != -1) { switch (opt) { case 'b': /* ignored */ break; case 'C': demangle = 1; break; case 'e': exe = optarg; break; case 'f': func = 1; break; case 'j': section = optarg; break; case 's': base = 1; break; case 'H': usage(); case 'V': version(); default: usage(); } } argv += optind; argc -= optind; if (exe == NULL) exe = "a.out"; if ((fd = open(exe, O_RDONLY)) < 0) err(EXIT_FAILURE, "%s", exe); if (dwarf_init(fd, DW_DLC_READ, NULL, NULL, &dbg, &de)) errx(EXIT_FAILURE, "dwarf_init: %s", dwarf_errmsg(de)); if (dwarf_get_elf(dbg, &e, &de) != DW_DLV_OK) errx(EXIT_FAILURE, "dwarf_get_elf: %s", dwarf_errmsg(de)); if (section) find_section_base(exe, e, section); else section_base = 0; if (argc > 0) for (i = 0; i < argc; i++) translate(dbg, argv[i]); else while (fgets(line, sizeof(line), stdin) != NULL) { translate(dbg, line); fflush(stdout); } dwarf_finish(dbg, &de); (void) elf_end(e); exit(0); } Index: projects/clang370-import/contrib/elftoolchain =================================================================== --- projects/clang370-import/contrib/elftoolchain (revision 288125) +++ projects/clang370-import/contrib/elftoolchain (revision 288126) Property changes on: projects/clang370-import/contrib/elftoolchain ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/contrib/elftoolchain:r286422-288125 Index: projects/clang370-import/lib/libc/resolv/res_debug.c =================================================================== --- projects/clang370-import/lib/libc/resolv/res_debug.c (revision 288125) +++ projects/clang370-import/lib/libc/resolv/res_debug.c (revision 288126) @@ -1,1248 +1,1240 @@ /* * Portions Copyright (C) 2004, 2005, 2008, 2009 Internet Systems Consortium, Inc. ("ISC") * Portions Copyright (C) 1996-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ /* * Copyright (c) 1985 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Portions Copyright (c) 1993 by Digital Equipment Corporation. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies, and that * the name of Digital Equipment Corporation not be used in advertising or * publicity pertaining to distribution of the document or software without * specific, written prior permission. * * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. */ /* * Portions Copyright (c) 1995 by International Business Machines, Inc. * * International Business Machines, Inc. (hereinafter called IBM) grants * permission under its copyrights to use, copy, modify, and distribute this * Software with or without fee, provided that the above copyright notice and * all paragraphs of this notice appear in all copies, and that the name of IBM * not be used in connection with the marketing of any product incorporating * the Software or modifications thereof, without specific, written prior * permission. * * To the extent it has a right to do so, IBM grants an immunity from suit * under its patents, if any, for the use, sale or manufacture of products to * the extent that such products are used for performing Domain Name System * dynamic updates in TCP/IP networks by means of the Software. No immunity is * granted for any product per se or for any other function of any product. * * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES. */ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)res_debug.c 8.1 (Berkeley) 6/4/93"; static const char rcsid[] = "$Id: res_debug.c,v 1.19 2009/02/26 11:20:20 tbox Exp $"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "port_before.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "port_after.h" #ifdef SPRINTF_CHAR # define SPRINTF(x) strlen(sprintf/**/x) #else # define SPRINTF(x) sprintf x #endif extern const char *_res_opcodes[]; extern const char *_res_sectioncodes[]; /*% * Print the current options. */ void fp_resstat(const res_state statp, FILE *file) { u_long mask; fprintf(file, ";; res options:"); for (mask = 1; mask != 0U; mask <<= 1) if (statp->options & mask) fprintf(file, " %s", p_option(mask)); putc('\n', file); } static void do_section(const res_state statp, ns_msg *handle, ns_sect section, int pflag, FILE *file) { int n, sflag, rrnum; static int buflen = 2048; char *buf; ns_opcode opcode; ns_rr rr; /* * Print answer records. */ sflag = (statp->pfcode & pflag); if (statp->pfcode && !sflag) return; buf = malloc(buflen); if (buf == NULL) { fprintf(file, ";; memory allocation failure\n"); return; } opcode = (ns_opcode) ns_msg_getflag(*handle, ns_f_opcode); rrnum = 0; for (;;) { if (ns_parserr(handle, section, rrnum, &rr)) { if (errno != ENODEV) fprintf(file, ";; ns_parserr: %s\n", strerror(errno)); else if (rrnum > 0 && sflag != 0 && (statp->pfcode & RES_PRF_HEAD1)) putc('\n', file); goto cleanup; } if (rrnum == 0 && sflag != 0 && (statp->pfcode & RES_PRF_HEAD1)) fprintf(file, ";; %s SECTION:\n", p_section(section, opcode)); if (section == ns_s_qd) fprintf(file, ";;\t%s, type = %s, class = %s\n", ns_rr_name(rr), p_type(ns_rr_type(rr)), p_class(ns_rr_class(rr))); else if (section == ns_s_ar && ns_rr_type(rr) == ns_t_opt) { u_int16_t optcode, optlen, rdatalen = ns_rr_rdlen(rr); u_int32_t ttl = ns_rr_ttl(rr); fprintf(file, "; EDNS: version: %u, udp=%u, flags=%04x\n", (ttl>>16)&0xff, ns_rr_class(rr), ttl&0xffff); while (rdatalen >= 4) { const u_char *cp = ns_rr_rdata(rr); int i; GETSHORT(optcode, cp); GETSHORT(optlen, cp); if (optcode == NS_OPT_NSID) { fputs("; NSID: ", file); if (optlen == 0) { fputs("; NSID\n", file); } else { fputs("; NSID: ", file); for (i = 0; i < optlen; i++) fprintf(file, "%02x ", cp[i]); fputs(" (",file); for (i = 0; i < optlen; i++) fprintf(file, "%c", isprint(cp[i])? cp[i] : '.'); fputs(")\n", file); } } else { if (optlen == 0) { fprintf(file, "; OPT=%u\n", optcode); } else { fprintf(file, "; OPT=%u: ", optcode); for (i = 0; i < optlen; i++) fprintf(file, "%02x ", cp[i]); fputs(" (",file); for (i = 0; i < optlen; i++) fprintf(file, "%c", isprint(cp[i]) ? cp[i] : '.'); fputs(")\n", file); } } rdatalen -= 4 + optlen; } } else { n = ns_sprintrr(handle, &rr, NULL, NULL, buf, buflen); if (n < 0) { if (errno == ENOSPC) { free(buf); buf = NULL; if (buflen < 131072) buf = malloc(buflen += 1024); if (buf == NULL) { fprintf(file, ";; memory allocation failure\n"); return; } continue; } fprintf(file, ";; ns_sprintrr: %s\n", strerror(errno)); goto cleanup; } fputs(buf, file); fputc('\n', file); } rrnum++; } cleanup: if (buf != NULL) free(buf); } /*% * Print the contents of a query. * This is intended to be primarily a debugging routine. */ void res_pquery(const res_state statp, const u_char *msg, int len, FILE *file) { ns_msg handle; int qdcount, ancount, nscount, arcount; u_int opcode, rcode, id; if (ns_initparse(msg, len, &handle) < 0) { fprintf(file, ";; ns_initparse: %s\n", strerror(errno)); return; } opcode = ns_msg_getflag(handle, ns_f_opcode); rcode = ns_msg_getflag(handle, ns_f_rcode); id = ns_msg_id(handle); qdcount = ns_msg_count(handle, ns_s_qd); ancount = ns_msg_count(handle, ns_s_an); nscount = ns_msg_count(handle, ns_s_ns); arcount = ns_msg_count(handle, ns_s_ar); /* * Print header fields. */ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEADX) || rcode) fprintf(file, ";; ->>HEADER<<- opcode: %s, status: %s, id: %d\n", _res_opcodes[opcode], p_rcode(rcode), id); if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEADX)) putc(';', file); if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEAD2)) { fprintf(file, "; flags:"); if (ns_msg_getflag(handle, ns_f_qr)) fprintf(file, " qr"); if (ns_msg_getflag(handle, ns_f_aa)) fprintf(file, " aa"); if (ns_msg_getflag(handle, ns_f_tc)) fprintf(file, " tc"); if (ns_msg_getflag(handle, ns_f_rd)) fprintf(file, " rd"); if (ns_msg_getflag(handle, ns_f_ra)) fprintf(file, " ra"); if (ns_msg_getflag(handle, ns_f_z)) fprintf(file, " ??"); if (ns_msg_getflag(handle, ns_f_ad)) fprintf(file, " ad"); if (ns_msg_getflag(handle, ns_f_cd)) fprintf(file, " cd"); } if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEAD1)) { fprintf(file, "; %s: %d", p_section(ns_s_qd, opcode), qdcount); fprintf(file, ", %s: %d", p_section(ns_s_an, opcode), ancount); fprintf(file, ", %s: %d", p_section(ns_s_ns, opcode), nscount); fprintf(file, ", %s: %d", p_section(ns_s_ar, opcode), arcount); } if ((!statp->pfcode) || (statp->pfcode & (RES_PRF_HEADX | RES_PRF_HEAD2 | RES_PRF_HEAD1))) { putc('\n',file); } /* * Print the various sections. */ do_section(statp, &handle, ns_s_qd, RES_PRF_QUES, file); do_section(statp, &handle, ns_s_an, RES_PRF_ANS, file); do_section(statp, &handle, ns_s_ns, RES_PRF_AUTH, file); do_section(statp, &handle, ns_s_ar, RES_PRF_ADD, file); if (qdcount == 0 && ancount == 0 && nscount == 0 && arcount == 0) putc('\n', file); } const u_char * p_cdnname(const u_char *cp, const u_char *msg, int len, FILE *file) { char name[MAXDNAME]; int n; if ((n = dn_expand(msg, msg + len, cp, name, sizeof name)) < 0) return (NULL); if (name[0] == '\0') putc('.', file); else fputs(name, file); return (cp + n); } const u_char * p_cdname(const u_char *cp, const u_char *msg, FILE *file) { return (p_cdnname(cp, msg, PACKETSZ, file)); } /*% * Return a fully-qualified domain name from a compressed name (with length supplied). */ const u_char * -p_fqnname(cp, msg, msglen, name, namelen) - const u_char *cp, *msg; - int msglen; - char *name; - int namelen; +p_fqnname(const u_char *cp, const u_char *msg, int msglen, char *name, + int namelen) { int n, newlen; if ((n = dn_expand(msg, cp + msglen, cp, name, namelen)) < 0) return (NULL); newlen = strlen(name); if (newlen == 0 || name[newlen - 1] != '.') { if (newlen + 1 >= namelen) /*%< Lack space for final dot */ return (NULL); else strcpy(name + newlen, "."); } return (cp + n); } /* XXX: the rest of these functions need to become length-limited, too. */ const u_char * p_fqname(const u_char *cp, const u_char *msg, FILE *file) { char name[MAXDNAME]; const u_char *n; n = p_fqnname(cp, msg, MAXCDNAME, name, sizeof name); if (n == NULL) return (NULL); fputs(name, file); return (n); } /*% * Names of RR classes and qclasses. Classes and qclasses are the same, except * that C_ANY is a qclass but not a class. (You can ask for records of class * C_ANY, but you can't have any records of that class in the database.) */ const struct res_sym __p_class_syms[] = { {C_IN, "IN", (char *)0}, {C_CHAOS, "CH", (char *)0}, {C_CHAOS, "CHAOS", (char *)0}, {C_HS, "HS", (char *)0}, {C_HS, "HESIOD", (char *)0}, {C_ANY, "ANY", (char *)0}, {C_NONE, "NONE", (char *)0}, {C_IN, (char *)0, (char *)0} }; /*% * Names of message sections. */ static const struct res_sym __p_default_section_syms[] = { {ns_s_qd, "QUERY", (char *)0}, {ns_s_an, "ANSWER", (char *)0}, {ns_s_ns, "AUTHORITY", (char *)0}, {ns_s_ar, "ADDITIONAL", (char *)0}, {0, (char *)0, (char *)0} }; static const struct res_sym __p_update_section_syms[] = { {S_ZONE, "ZONE", (char *)0}, {S_PREREQ, "PREREQUISITE", (char *)0}, {S_UPDATE, "UPDATE", (char *)0}, {S_ADDT, "ADDITIONAL", (char *)0}, {0, (char *)0, (char *)0} }; const struct res_sym __p_key_syms[] = { {NS_ALG_MD5RSA, "RSA", "RSA KEY with MD5 hash"}, {NS_ALG_DH, "DH", "Diffie Hellman"}, {NS_ALG_DSA, "DSA", "Digital Signature Algorithm"}, {NS_ALG_EXPIRE_ONLY, "EXPIREONLY", "No algorithm"}, {NS_ALG_PRIVATE_OID, "PRIVATE", "Algorithm obtained from OID"}, {0, NULL, NULL} }; const struct res_sym __p_cert_syms[] = { {cert_t_pkix, "PKIX", "PKIX (X.509v3) Certificate"}, {cert_t_spki, "SPKI", "SPKI certificate"}, {cert_t_pgp, "PGP", "PGP certificate"}, {cert_t_url, "URL", "URL Private"}, {cert_t_oid, "OID", "OID Private"}, {0, NULL, NULL} }; /*% * Names of RR types and qtypes. Types and qtypes are the same, except * that T_ANY is a qtype but not a type. (You can ask for records of type * T_ANY, but you can't have any records of that type in the database.) */ const struct res_sym __p_type_syms[] = { {ns_t_a, "A", "address"}, {ns_t_ns, "NS", "name server"}, {ns_t_md, "MD", "mail destination (deprecated)"}, {ns_t_mf, "MF", "mail forwarder (deprecated)"}, {ns_t_cname, "CNAME", "canonical name"}, {ns_t_soa, "SOA", "start of authority"}, {ns_t_mb, "MB", "mailbox"}, {ns_t_mg, "MG", "mail group member"}, {ns_t_mr, "MR", "mail rename"}, {ns_t_null, "NULL", "null"}, {ns_t_wks, "WKS", "well-known service (deprecated)"}, {ns_t_ptr, "PTR", "domain name pointer"}, {ns_t_hinfo, "HINFO", "host information"}, {ns_t_minfo, "MINFO", "mailbox information"}, {ns_t_mx, "MX", "mail exchanger"}, {ns_t_txt, "TXT", "text"}, {ns_t_rp, "RP", "responsible person"}, {ns_t_afsdb, "AFSDB", "DCE or AFS server"}, {ns_t_x25, "X25", "X25 address"}, {ns_t_isdn, "ISDN", "ISDN address"}, {ns_t_rt, "RT", "router"}, {ns_t_nsap, "NSAP", "nsap address"}, {ns_t_nsap_ptr, "NSAP_PTR", "domain name pointer"}, {ns_t_sig, "SIG", "signature"}, {ns_t_key, "KEY", "key"}, {ns_t_px, "PX", "mapping information"}, {ns_t_gpos, "GPOS", "geographical position (withdrawn)"}, {ns_t_aaaa, "AAAA", "IPv6 address"}, {ns_t_loc, "LOC", "location"}, {ns_t_nxt, "NXT", "next valid name (unimplemented)"}, {ns_t_eid, "EID", "endpoint identifier (unimplemented)"}, {ns_t_nimloc, "NIMLOC", "NIMROD locator (unimplemented)"}, {ns_t_srv, "SRV", "server selection"}, {ns_t_atma, "ATMA", "ATM address (unimplemented)"}, {ns_t_naptr, "NAPTR", "naptr"}, {ns_t_kx, "KX", "key exchange"}, {ns_t_cert, "CERT", "certificate"}, {ns_t_a6, "A", "IPv6 address (experminental)"}, {ns_t_dname, "DNAME", "non-terminal redirection"}, {ns_t_opt, "OPT", "opt"}, {ns_t_apl, "apl", "apl"}, {ns_t_ds, "DS", "delegation signer"}, {ns_t_sshfp, "SSFP", "SSH fingerprint"}, {ns_t_ipseckey, "IPSECKEY", "IPSEC key"}, {ns_t_rrsig, "RRSIG", "rrsig"}, {ns_t_nsec, "NSEC", "nsec"}, {ns_t_dnskey, "DNSKEY", "DNS key"}, {ns_t_dhcid, "DHCID", "dynamic host configuration identifier"}, {ns_t_nsec3, "NSEC3", "nsec3"}, {ns_t_nsec3param, "NSEC3PARAM", "NSEC3 parameters"}, {ns_t_hip, "HIP", "host identity protocol"}, {ns_t_spf, "SPF", "sender policy framework"}, {ns_t_tkey, "TKEY", "tkey"}, {ns_t_tsig, "TSIG", "transaction signature"}, {ns_t_ixfr, "IXFR", "incremental zone transfer"}, {ns_t_axfr, "AXFR", "zone transfer"}, {ns_t_zxfr, "ZXFR", "compressed zone transfer"}, {ns_t_mailb, "MAILB", "mailbox-related data (deprecated)"}, {ns_t_maila, "MAILA", "mail agent (deprecated)"}, {ns_t_naptr, "NAPTR", "URN Naming Authority"}, {ns_t_kx, "KX", "Key Exchange"}, {ns_t_cert, "CERT", "Certificate"}, {ns_t_a6, "A6", "IPv6 Address"}, {ns_t_dname, "DNAME", "dname"}, {ns_t_sink, "SINK", "Kitchen Sink (experimental)"}, {ns_t_opt, "OPT", "EDNS Options"}, {ns_t_any, "ANY", "\"any\""}, {ns_t_dlv, "DLV", "DNSSEC look-aside validation"}, {0, NULL, NULL} }; /*% * Names of DNS rcodes. */ const struct res_sym __p_rcode_syms[] = { {ns_r_noerror, "NOERROR", "no error"}, {ns_r_formerr, "FORMERR", "format error"}, {ns_r_servfail, "SERVFAIL", "server failed"}, {ns_r_nxdomain, "NXDOMAIN", "no such domain name"}, {ns_r_notimpl, "NOTIMP", "not implemented"}, {ns_r_refused, "REFUSED", "refused"}, {ns_r_yxdomain, "YXDOMAIN", "domain name exists"}, {ns_r_yxrrset, "YXRRSET", "rrset exists"}, {ns_r_nxrrset, "NXRRSET", "rrset doesn't exist"}, {ns_r_notauth, "NOTAUTH", "not authoritative"}, {ns_r_notzone, "NOTZONE", "Not in zone"}, {ns_r_max, "", ""}, {ns_r_badsig, "BADSIG", "bad signature"}, {ns_r_badkey, "BADKEY", "bad key"}, {ns_r_badtime, "BADTIME", "bad time"}, {0, NULL, NULL} }; int sym_ston(const struct res_sym *syms, const char *name, int *success) { for ((void)NULL; syms->name != 0; syms++) { if (strcasecmp (name, syms->name) == 0) { if (success) *success = 1; return (syms->number); } } if (success) *success = 0; return (syms->number); /*%< The default value. */ } const char * sym_ntos(const struct res_sym *syms, int number, int *success) { char *unname = sym_ntos_unname; for ((void)NULL; syms->name != 0; syms++) { if (number == syms->number) { if (success) *success = 1; return (syms->name); } } sprintf(unname, "%d", number); /*%< XXX nonreentrant */ if (success) *success = 0; return (unname); } const char * sym_ntop(const struct res_sym *syms, int number, int *success) { char *unname = sym_ntop_unname; for ((void)NULL; syms->name != 0; syms++) { if (number == syms->number) { if (success) *success = 1; return (syms->humanname); } } sprintf(unname, "%d", number); /*%< XXX nonreentrant */ if (success) *success = 0; return (unname); } /*% * Return a string for the type. */ const char * p_type(int type) { int success; const char *result; static char typebuf[20]; result = sym_ntos(__p_type_syms, type, &success); if (success) return (result); if (type < 0 || type > 0xffff) return ("BADTYPE"); sprintf(typebuf, "TYPE%d", type); return (typebuf); } /*% * Return a string for the type. */ const char * p_section(int section, int opcode) { const struct res_sym *symbols; switch (opcode) { case ns_o_update: symbols = __p_update_section_syms; break; default: symbols = __p_default_section_syms; break; } return (sym_ntos(symbols, section, (int *)0)); } /*% * Return a mnemonic for class. */ const char * p_class(int class) { int success; const char *result; static char classbuf[20]; result = sym_ntos(__p_class_syms, class, &success); if (success) return (result); if (class < 0 || class > 0xffff) return ("BADCLASS"); sprintf(classbuf, "CLASS%d", class); return (classbuf); } /*% * Return a mnemonic for an option */ const char * p_option(u_long option) { char *nbuf = p_option_nbuf; switch (option) { case RES_INIT: return "init"; case RES_DEBUG: return "debug"; case RES_AAONLY: return "aaonly(unimpl)"; case RES_USEVC: return "usevc"; case RES_PRIMARY: return "primry(unimpl)"; case RES_IGNTC: return "igntc"; case RES_RECURSE: return "recurs"; case RES_DEFNAMES: return "defnam"; case RES_STAYOPEN: return "styopn"; case RES_DNSRCH: return "dnsrch"; case RES_INSECURE1: return "insecure1"; case RES_INSECURE2: return "insecure2"; case RES_NOALIASES: return "noaliases"; case RES_USE_INET6: return "inet6"; #ifdef RES_USE_EDNS0 /*%< KAME extension */ case RES_USE_EDNS0: return "edns0"; case RES_NSID: return "nsid"; #endif #ifdef RES_USE_DNAME case RES_USE_DNAME: return "dname"; #endif #ifdef RES_USE_DNSSEC case RES_USE_DNSSEC: return "dnssec"; #endif #ifdef RES_NOTLDQUERY case RES_NOTLDQUERY: return "no-tld-query"; #endif #ifdef RES_NO_NIBBLE2 case RES_NO_NIBBLE2: return "no-nibble2"; #endif /* XXX nonreentrant */ default: sprintf(nbuf, "?0x%lx?", (u_long)option); return (nbuf); } } /*% * Return a mnemonic for a time to live. */ const char * p_time(u_int32_t value) { char *nbuf = p_time_nbuf; if (ns_format_ttl(value, nbuf, sizeof nbuf) < 0) sprintf(nbuf, "%u", value); return (nbuf); } /*% * Return a string for the rcode. */ const char * p_rcode(int rcode) { return (sym_ntos(__p_rcode_syms, rcode, (int *)0)); } /*% * Return a string for a res_sockaddr_union. */ const char * p_sockun(union res_sockaddr_union u, char *buf, size_t size) { char ret[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:123.123.123.123"]; switch (u.sin.sin_family) { case AF_INET: inet_ntop(AF_INET, &u.sin.sin_addr, ret, sizeof ret); break; #ifdef HAS_INET6_STRUCTS case AF_INET6: inet_ntop(AF_INET6, &u.sin6.sin6_addr, ret, sizeof ret); break; #endif default: sprintf(ret, "[af%d]", u.sin.sin_family); break; } if (size > 0U) { strncpy(buf, ret, size - 1); buf[size - 1] = '0'; } return (buf); } /*% * routines to convert between on-the-wire RR format and zone file format. * Does not contain conversion to/from decimal degrees; divide or multiply * by 60*60*1000 for that. */ static unsigned int poweroften[10] = {1, 10, 100, 1000, 10000, 100000, 1000000,10000000,100000000,1000000000}; /*% takes an XeY precision/size value, returns a string representation. */ static const char * -precsize_ntoa(prec) - u_int8_t prec; +precsize_ntoa(u_int8_t prec) { char *retbuf = precsize_ntoa_retbuf; unsigned long val; int mantissa, exponent; mantissa = (int)((prec >> 4) & 0x0f) % 10; exponent = (int)((prec >> 0) & 0x0f) % 10; val = mantissa * poweroften[exponent]; (void) sprintf(retbuf, "%lu.%.2lu", val/100, val%100); return (retbuf); } /*% converts ascii size/precision X * 10**Y(cm) to 0xXY. moves pointer. */ static u_int8_t precsize_aton(const char **strptr) { unsigned int mval = 0, cmval = 0; u_int8_t retval = 0; const char *cp; int exponent; int mantissa; cp = *strptr; while (isdigit((unsigned char)*cp)) mval = mval * 10 + (*cp++ - '0'); if (*cp == '.') { /*%< centimeters */ cp++; if (isdigit((unsigned char)*cp)) { cmval = (*cp++ - '0') * 10; if (isdigit((unsigned char)*cp)) { cmval += (*cp++ - '0'); } } } cmval = (mval * 100) + cmval; for (exponent = 0; exponent < 9; exponent++) if (cmval < poweroften[exponent+1]) break; mantissa = cmval / poweroften[exponent]; if (mantissa > 9) mantissa = 9; retval = (mantissa << 4) | exponent; *strptr = cp; return (retval); } /*% converts ascii lat/lon to unsigned encoded 32-bit number. moves pointer. */ static u_int32_t latlon2ul(const char **latlonstrptr, int *which) { const char *cp; u_int32_t retval; int deg = 0, min = 0, secs = 0, secsfrac = 0; cp = *latlonstrptr; while (isdigit((unsigned char)*cp)) deg = deg * 10 + (*cp++ - '0'); while (isspace((unsigned char)*cp)) cp++; if (!(isdigit((unsigned char)*cp))) goto fndhemi; while (isdigit((unsigned char)*cp)) min = min * 10 + (*cp++ - '0'); while (isspace((unsigned char)*cp)) cp++; if (!(isdigit((unsigned char)*cp))) goto fndhemi; while (isdigit((unsigned char)*cp)) secs = secs * 10 + (*cp++ - '0'); if (*cp == '.') { /*%< decimal seconds */ cp++; if (isdigit((unsigned char)*cp)) { secsfrac = (*cp++ - '0') * 100; if (isdigit((unsigned char)*cp)) { secsfrac += (*cp++ - '0') * 10; if (isdigit((unsigned char)*cp)) { secsfrac += (*cp++ - '0'); } } } } while (!isspace((unsigned char)*cp)) /*%< if any trailing garbage */ cp++; while (isspace((unsigned char)*cp)) cp++; fndhemi: switch (*cp) { case 'N': case 'n': case 'E': case 'e': retval = ((unsigned)1<<31) + (((((deg * 60) + min) * 60) + secs) * 1000) + secsfrac; break; case 'S': case 's': case 'W': case 'w': retval = ((unsigned)1<<31) - (((((deg * 60) + min) * 60) + secs) * 1000) - secsfrac; break; default: retval = 0; /*%< invalid value -- indicates error */ break; } switch (*cp) { case 'N': case 'n': case 'S': case 's': *which = 1; /*%< latitude */ break; case 'E': case 'e': case 'W': case 'w': *which = 2; /*%< longitude */ break; default: *which = 0; /*%< error */ break; } cp++; /*%< skip the hemisphere */ while (!isspace((unsigned char)*cp)) /*%< if any trailing garbage */ cp++; while (isspace((unsigned char)*cp)) /*%< move to next field */ cp++; *latlonstrptr = cp; return (retval); } /*% * converts a zone file representation in a string to an RDATA on-the-wire * representation. */ int -loc_aton(ascii, binary) - const char *ascii; - u_char *binary; +loc_aton(const char *ascii, u_char *binary) { const char *cp, *maxcp; u_char *bcp; u_int32_t latit = 0, longit = 0, alt = 0; u_int32_t lltemp1 = 0, lltemp2 = 0; int altmeters = 0, altfrac = 0, altsign = 1; u_int8_t hp = 0x16; /*%< default = 1e6 cm = 10000.00m = 10km */ u_int8_t vp = 0x13; /*%< default = 1e3 cm = 10.00m */ u_int8_t siz = 0x12; /*%< default = 1e2 cm = 1.00m */ int which1 = 0, which2 = 0; cp = ascii; maxcp = cp + strlen(ascii); lltemp1 = latlon2ul(&cp, &which1); lltemp2 = latlon2ul(&cp, &which2); switch (which1 + which2) { case 3: /*%< 1 + 2, the only valid combination */ if ((which1 == 1) && (which2 == 2)) { /*%< normal case */ latit = lltemp1; longit = lltemp2; } else if ((which1 == 2) && (which2 == 1)) { /*%< reversed */ longit = lltemp1; latit = lltemp2; } else { /*%< some kind of brokenness */ return (0); } break; default: /*%< we didn't get one of each */ return (0); } /* altitude */ if (*cp == '-') { altsign = -1; cp++; } if (*cp == '+') cp++; while (isdigit((unsigned char)*cp)) altmeters = altmeters * 10 + (*cp++ - '0'); if (*cp == '.') { /*%< decimal meters */ cp++; if (isdigit((unsigned char)*cp)) { altfrac = (*cp++ - '0') * 10; if (isdigit((unsigned char)*cp)) { altfrac += (*cp++ - '0'); } } } alt = (10000000 + (altsign * (altmeters * 100 + altfrac))); while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */ cp++; while (isspace((unsigned char)*cp) && (cp < maxcp)) cp++; if (cp >= maxcp) goto defaults; siz = precsize_aton(&cp); while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */ cp++; while (isspace((unsigned char)*cp) && (cp < maxcp)) cp++; if (cp >= maxcp) goto defaults; hp = precsize_aton(&cp); while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */ cp++; while (isspace((unsigned char)*cp) && (cp < maxcp)) cp++; if (cp >= maxcp) goto defaults; vp = precsize_aton(&cp); defaults: bcp = binary; *bcp++ = (u_int8_t) 0; /*%< version byte */ *bcp++ = siz; *bcp++ = hp; *bcp++ = vp; PUTLONG(latit,bcp); PUTLONG(longit,bcp); PUTLONG(alt,bcp); return (16); /*%< size of RR in octets */ } /*% takes an on-the-wire LOC RR and formats it in a human readable format. */ const char * -loc_ntoa(binary, ascii) - const u_char *binary; - char *ascii; +loc_ntoa(const u_char *binary, char *ascii) { static const char *error = "?"; static char tmpbuf[sizeof "1000 60 60.000 N 1000 60 60.000 W -12345678.00m 90000000.00m 90000000.00m 90000000.00m"]; const u_char *cp = binary; int latdeg, latmin, latsec, latsecfrac; int longdeg, longmin, longsec, longsecfrac; char northsouth, eastwest; const char *altsign; int altmeters, altfrac; const u_int32_t referencealt = 100000 * 100; int32_t latval, longval, altval; u_int32_t templ; u_int8_t sizeval, hpval, vpval, versionval; char *sizestr, *hpstr, *vpstr; versionval = *cp++; if (ascii == NULL) ascii = tmpbuf; if (versionval) { (void) sprintf(ascii, "; error: unknown LOC RR version"); return (ascii); } sizeval = *cp++; hpval = *cp++; vpval = *cp++; GETLONG(templ, cp); latval = (templ - ((unsigned)1<<31)); GETLONG(templ, cp); longval = (templ - ((unsigned)1<<31)); GETLONG(templ, cp); if (templ < referencealt) { /*%< below WGS 84 spheroid */ altval = referencealt - templ; altsign = "-"; } else { altval = templ - referencealt; altsign = ""; } if (latval < 0) { northsouth = 'S'; latval = -latval; } else northsouth = 'N'; latsecfrac = latval % 1000; latval = latval / 1000; latsec = latval % 60; latval = latval / 60; latmin = latval % 60; latval = latval / 60; latdeg = latval; if (longval < 0) { eastwest = 'W'; longval = -longval; } else eastwest = 'E'; longsecfrac = longval % 1000; longval = longval / 1000; longsec = longval % 60; longval = longval / 60; longmin = longval % 60; longval = longval / 60; longdeg = longval; altfrac = altval % 100; altmeters = (altval / 100); sizestr = strdup(precsize_ntoa(sizeval)); hpstr = strdup(precsize_ntoa(hpval)); vpstr = strdup(precsize_ntoa(vpval)); sprintf(ascii, "%d %.2d %.2d.%.3d %c %d %.2d %.2d.%.3d %c %s%d.%.2dm %sm %sm %sm", latdeg, latmin, latsec, latsecfrac, northsouth, longdeg, longmin, longsec, longsecfrac, eastwest, altsign, altmeters, altfrac, (sizestr != NULL) ? sizestr : error, (hpstr != NULL) ? hpstr : error, (vpstr != NULL) ? vpstr : error); if (sizestr != NULL) free(sizestr); if (hpstr != NULL) free(hpstr); if (vpstr != NULL) free(vpstr); return (ascii); } /*% Return the number of DNS hierarchy levels in the name. */ int dn_count_labels(const char *name) { int i, len, count; len = strlen(name); for (i = 0, count = 0; i < len; i++) { /* XXX need to check for \. or use named's nlabels(). */ if (name[i] == '.') count++; } /* don't count initial wildcard */ if (name[0] == '*') if (count) count--; /* don't count the null label for root. */ /* if terminating '.' not found, must adjust */ /* count to include last label */ if (len > 0 && name[len-1] != '.') count++; return (count); } /*% * Make dates expressed in seconds-since-Jan-1-1970 easy to read. * SIG records are required to be printed like this, by the Secure DNS RFC. */ char * p_secstodate (u_long secs) { char *output = p_secstodate_output; time_t clock = secs; struct tm *time; #ifdef HAVE_TIME_R struct tm res; time = gmtime_r(&clock, &res); #else time = gmtime(&clock); #endif time->tm_year += 1900; time->tm_mon += 1; sprintf(output, "%04d%02d%02d%02d%02d%02d", time->tm_year, time->tm_mon, time->tm_mday, time->tm_hour, time->tm_min, time->tm_sec); return (output); } u_int16_t res_nametoclass(const char *buf, int *successp) { unsigned long result; char *endptr; int success; result = sym_ston(__p_class_syms, buf, &success); if (success) goto done; if (strncasecmp(buf, "CLASS", 5) != 0 || !isdigit((unsigned char)buf[5])) goto done; errno = 0; result = strtoul(buf + 5, &endptr, 10); if (errno == 0 && *endptr == '\0' && result <= 0xffffU) success = 1; done: if (successp) *successp = success; return (result); } u_int16_t res_nametotype(const char *buf, int *successp) { unsigned long result; char *endptr; int success; result = sym_ston(__p_type_syms, buf, &success); if (success) goto done; if (strncasecmp(buf, "type", 4) != 0 || !isdigit((unsigned char)buf[4])) goto done; errno = 0; result = strtoul(buf + 4, &endptr, 10); if (errno == 0 && *endptr == '\0' && result <= 0xffffU) success = 1; done: if (successp) *successp = success; return (result); } /* * Weak aliases for applications that use certain private entry points, * and fail to include . */ #undef fp_resstat __weak_reference(__fp_resstat, fp_resstat); #undef p_fqnname __weak_reference(__p_fqnname, p_fqnname); #undef sym_ston __weak_reference(__sym_ston, sym_ston); #undef sym_ntos __weak_reference(__sym_ntos, sym_ntos); #undef sym_ntop __weak_reference(__sym_ntop, sym_ntop); #undef dn_count_labels __weak_reference(__dn_count_labels, dn_count_labels); #undef p_secstodate __weak_reference(__p_secstodate, p_secstodate); /*! \file */ Index: projects/clang370-import/lib/libc/resolv/res_init.c =================================================================== --- projects/clang370-import/lib/libc/resolv/res_init.c (revision 288125) +++ projects/clang370-import/lib/libc/resolv/res_init.c (revision 288126) @@ -1,929 +1,928 @@ /* * Copyright (c) 1985, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Portions Copyright (c) 1993 by Digital Equipment Corporation. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies, and that * the name of Digital Equipment Corporation not be used in advertising or * publicity pertaining to distribution of the document or software without * specific, written prior permission. * * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. */ /* * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC") * Portions Copyright (c) 1996-1999 by Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)res_init.c 8.1 (Berkeley) 6/7/93"; static const char rcsid[] = "$Id: res_init.c,v 1.26 2008/12/11 09:59:00 marka Exp $"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); #include "port_before.h" #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef HAVE_MD5 # include "../dst/md5.h" #else # ifdef SOLARIS2 # include # elif _LIBC # include # endif #endif #ifndef _MD5_H_ # define _MD5_H_ 1 /*%< make sure we do not include rsaref md5.h file */ #endif #include "un-namespace.h" #include "port_after.h" /* ensure that sockaddr_in6 and IN6ADDR_ANY_INIT are declared / defined */ #include #include "res_private.h" /*% Options. Should all be left alone. */ #define RESOLVSORT #define DEBUG #ifdef SOLARIS2 #include #endif static void res_setoptions(res_state, const char *, const char *); #ifdef RESOLVSORT static const char sort_mask[] = "/&"; #define ISSORTMASK(ch) (strchr(sort_mask, ch) != NULL) static u_int32_t net_mask(struct in_addr); #endif #if !defined(isascii) /*%< XXX - could be a function */ # define isascii(c) (!(c & 0200)) #endif /* * Resolver state default settings. */ /*% * Set up default settings. If the configuration file exist, the values * there will have precedence. Otherwise, the server address is set to * INADDR_ANY and the default domain name comes from the gethostname(). * * An interrim version of this code (BIND 4.9, pre-4.4BSD) used 127.0.0.1 * rather than INADDR_ANY ("0.0.0.0") as the default name server address * since it was noted that INADDR_ANY actually meant ``the first interface * you "ifconfig"'d at boot time'' and if this was a SLIP or PPP interface, * it had to be "up" in order for you to reach your own name server. It * was later decided that since the recommended practice is to always * install local static routes through 127.0.0.1 for all your network * interfaces, that we could solve this problem without a code change. * * The configuration file should always be used, since it is the only way * to specify a default domain. If you are running a server on your local * machine, you should say "nameserver 0.0.0.0" or "nameserver 127.0.0.1" * in the configuration file. * * Return 0 if completes successfully, -1 on error */ int res_ninit(res_state statp) { extern int __res_vinit(res_state, int); return (__res_vinit(statp, 0)); } /*% This function has to be reachable by res_data.c but not publically. */ int __res_vinit(res_state statp, int preinit) { FILE *fp; char *cp, **pp; int n; char buf[BUFSIZ]; int nserv = 0; /*%< number of nameserver records read from file */ int haveenv = 0; int havesearch = 0; #ifdef RESOLVSORT int nsort = 0; char *net; #endif int dots; union res_sockaddr_union u[2]; int maxns = MAXNS; RES_SET_H_ERRNO(statp, 0); if (statp->_u._ext.ext != NULL) res_ndestroy(statp); if (!preinit) { statp->retrans = RES_TIMEOUT; statp->retry = RES_DFLRETRY; statp->options = RES_DEFAULT; } statp->_rnd = malloc(16); res_rndinit(statp); statp->id = res_nrandomid(statp); memset(u, 0, sizeof(u)); #ifdef USELOOPBACK u[nserv].sin.sin_addr = inet_makeaddr(IN_LOOPBACKNET, 1); #else u[nserv].sin.sin_addr.s_addr = INADDR_ANY; #endif u[nserv].sin.sin_family = AF_INET; u[nserv].sin.sin_port = htons(NAMESERVER_PORT); #ifdef HAVE_SA_LEN u[nserv].sin.sin_len = sizeof(struct sockaddr_in); #endif nserv++; #ifdef HAS_INET6_STRUCTS #ifdef USELOOPBACK u[nserv].sin6.sin6_addr = in6addr_loopback; #else u[nserv].sin6.sin6_addr = in6addr_any; #endif u[nserv].sin6.sin6_family = AF_INET6; u[nserv].sin6.sin6_port = htons(NAMESERVER_PORT); #ifdef HAVE_SA_LEN u[nserv].sin6.sin6_len = sizeof(struct sockaddr_in6); #endif nserv++; #endif statp->nscount = 0; statp->ndots = 1; statp->pfcode = 0; statp->_vcsock = -1; statp->_flags = 0; statp->qhook = NULL; statp->rhook = NULL; statp->_u._ext.nscount = 0; statp->_u._ext.ext = malloc(sizeof(*statp->_u._ext.ext)); if (statp->_u._ext.ext != NULL) { memset(statp->_u._ext.ext, 0, sizeof(*statp->_u._ext.ext)); statp->_u._ext.ext->nsaddrs[0].sin = statp->nsaddr; strcpy(statp->_u._ext.ext->nsuffix, "ip6.arpa"); strcpy(statp->_u._ext.ext->nsuffix2, "ip6.int"); } else { /* * Historically res_init() rarely, if at all, failed. * Examples and applications exist which do not check * our return code. Furthermore several applications * simply call us to get the systems domainname. So * rather then immediately fail here we store the * failure, which is returned later, in h_errno. And * prevent the collection of 'nameserver' information * by setting maxns to 0. Thus applications that fail * to check our return code wont be able to make * queries anyhow. */ RES_SET_H_ERRNO(statp, NETDB_INTERNAL); maxns = 0; } #ifdef RESOLVSORT statp->nsort = 0; #endif res_setservers(statp, u, nserv); #ifdef SOLARIS2 /* * The old libresolv derived the defaultdomain from NIS/NIS+. * We want to keep this behaviour */ { char buf[sizeof(statp->defdname)], *cp; int ret; if ((ret = sysinfo(SI_SRPC_DOMAIN, buf, sizeof(buf))) > 0 && (unsigned int)ret <= sizeof(buf)) { if (buf[0] == '+') buf[0] = '.'; cp = strchr(buf, '.'); cp = (cp == NULL) ? buf : (cp + 1); strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1); statp->defdname[sizeof(statp->defdname) - 1] = '\0'; } } #endif /* SOLARIS2 */ /* Allow user to override the local domain definition */ if (issetugid() == 0 && (cp = getenv("LOCALDOMAIN")) != NULL) { (void)strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1); statp->defdname[sizeof(statp->defdname) - 1] = '\0'; haveenv++; /* * Set search list to be blank-separated strings * from rest of env value. Permits users of LOCALDOMAIN * to still have a search list, and anyone to set the * one that they want to use as an individual (even more * important now that the rfc1535 stuff restricts searches) */ cp = statp->defdname; pp = statp->dnsrch; *pp++ = cp; for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) { if (*cp == '\n') /*%< silly backwards compat */ break; else if (*cp == ' ' || *cp == '\t') { *cp = 0; n = 1; } else if (n) { *pp++ = cp; n = 0; havesearch = 1; } } /* null terminate last domain if there are excess */ while (*cp != '\0' && *cp != ' ' && *cp != '\t' && *cp != '\n') cp++; *cp = '\0'; *pp++ = 0; } #define MATCH(line, name) \ (!strncmp(line, name, sizeof(name) - 1) && \ (line[sizeof(name) - 1] == ' ' || \ line[sizeof(name) - 1] == '\t')) nserv = 0; if ((fp = fopen(_PATH_RESCONF, "re")) != NULL) { /* read the config file */ while (fgets(buf, sizeof(buf), fp) != NULL) { /* skip comments */ if (*buf == ';' || *buf == '#') continue; /* read default domain name */ if (MATCH(buf, "domain")) { if (haveenv) /*%< skip if have from environ */ continue; cp = buf + sizeof("domain") - 1; while (*cp == ' ' || *cp == '\t') cp++; if ((*cp == '\0') || (*cp == '\n')) continue; strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1); statp->defdname[sizeof(statp->defdname) - 1] = '\0'; if ((cp = strpbrk(statp->defdname, " \t\n")) != NULL) *cp = '\0'; havesearch = 0; continue; } /* set search list */ if (MATCH(buf, "search")) { if (haveenv) /*%< skip if have from environ */ continue; cp = buf + sizeof("search") - 1; while (*cp == ' ' || *cp == '\t') cp++; if ((*cp == '\0') || (*cp == '\n')) continue; strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1); statp->defdname[sizeof(statp->defdname) - 1] = '\0'; if ((cp = strchr(statp->defdname, '\n')) != NULL) *cp = '\0'; /* * Set search list to be blank-separated strings * on rest of line. */ cp = statp->defdname; pp = statp->dnsrch; *pp++ = cp; for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) { if (*cp == ' ' || *cp == '\t') { *cp = 0; n = 1; } else if (n) { *pp++ = cp; n = 0; } } /* null terminate last domain if there are excess */ while (*cp != '\0' && *cp != ' ' && *cp != '\t') cp++; *cp = '\0'; *pp++ = 0; havesearch = 1; continue; } /* read nameservers to query */ if (MATCH(buf, "nameserver") && nserv < maxns) { struct addrinfo hints, *ai; char sbuf[NI_MAXSERV]; const size_t minsiz = sizeof(statp->_u._ext.ext->nsaddrs[0]); cp = buf + sizeof("nameserver") - 1; while (*cp == ' ' || *cp == '\t') cp++; cp[strcspn(cp, ";# \t\n")] = '\0'; if ((*cp != '\0') && (*cp != '\n')) { memset(&hints, 0, sizeof(hints)); hints.ai_family = PF_UNSPEC; hints.ai_socktype = SOCK_DGRAM; /*dummy*/ hints.ai_flags = AI_NUMERICHOST; sprintf(sbuf, "%u", NAMESERVER_PORT); if (getaddrinfo(cp, sbuf, &hints, &ai) == 0 && ai->ai_addrlen <= minsiz) { if (statp->_u._ext.ext != NULL) { memcpy(&statp->_u._ext.ext->nsaddrs[nserv], ai->ai_addr, ai->ai_addrlen); } if (ai->ai_addrlen <= sizeof(statp->nsaddr_list[nserv])) { memcpy(&statp->nsaddr_list[nserv], ai->ai_addr, ai->ai_addrlen); } else statp->nsaddr_list[nserv].sin_family = 0; freeaddrinfo(ai); nserv++; } } continue; } #ifdef RESOLVSORT if (MATCH(buf, "sortlist")) { struct in_addr a; struct in6_addr a6; int m, i; u_char *u; struct __res_state_ext *ext = statp->_u._ext.ext; cp = buf + sizeof("sortlist") - 1; while (nsort < MAXRESOLVSORT) { while (*cp == ' ' || *cp == '\t') cp++; if (*cp == '\0' || *cp == '\n' || *cp == ';') break; net = cp; while (*cp && !ISSORTMASK(*cp) && *cp != ';' && isascii(*cp) && !isspace((unsigned char)*cp)) cp++; n = *cp; *cp = 0; if (inet_aton(net, &a)) { statp->sort_list[nsort].addr = a; if (ISSORTMASK(n)) { *cp++ = n; net = cp; while (*cp && *cp != ';' && isascii(*cp) && !isspace((unsigned char)*cp)) cp++; n = *cp; *cp = 0; if (inet_aton(net, &a)) { statp->sort_list[nsort].mask = a.s_addr; } else { statp->sort_list[nsort].mask = net_mask(statp->sort_list[nsort].addr); } } else { statp->sort_list[nsort].mask = net_mask(statp->sort_list[nsort].addr); } ext->sort_list[nsort].af = AF_INET; ext->sort_list[nsort].addr.ina = statp->sort_list[nsort].addr; ext->sort_list[nsort].mask.ina.s_addr = statp->sort_list[nsort].mask; nsort++; } else if (inet_pton(AF_INET6, net, &a6) == 1) { ext->sort_list[nsort].af = AF_INET6; ext->sort_list[nsort].addr.in6a = a6; u = (u_char *)&ext->sort_list[nsort].mask.in6a; *cp++ = n; net = cp; while (*cp && *cp != ';' && isascii(*cp) && !isspace(*cp)) cp++; m = n; n = *cp; *cp = 0; switch (m) { case '/': m = atoi(net); break; case '&': if (inet_pton(AF_INET6, net, u) == 1) { m = -1; break; } /*FALLTHROUGH*/ default: m = sizeof(struct in6_addr) * CHAR_BIT; break; } if (m >= 0) { for (i = 0; i < sizeof(struct in6_addr); i++) { if (m <= 0) { *u = 0; } else { m -= CHAR_BIT; *u = (u_char)~0; if (m < 0) *u <<= -m; } u++; } } statp->sort_list[nsort].addr.s_addr = (u_int32_t)0xffffffff; statp->sort_list[nsort].mask = (u_int32_t)0xffffffff; nsort++; } *cp = n; } continue; } #endif if (MATCH(buf, "options")) { res_setoptions(statp, buf + sizeof("options") - 1, "conf"); continue; } } if (nserv > 0) statp->nscount = nserv; #ifdef RESOLVSORT statp->nsort = nsort; #endif (void) fclose(fp); } /* * Last chance to get a nameserver. This should not normally * be necessary */ #ifdef NO_RESOLV_CONF if(nserv == 0) nserv = get_nameservers(statp); #endif if (statp->defdname[0] == 0 && gethostname(buf, sizeof(statp->defdname) - 1) == 0 && (cp = strchr(buf, '.')) != NULL) strcpy(statp->defdname, cp + 1); /* find components of local domain that might be searched */ if (havesearch == 0) { pp = statp->dnsrch; *pp++ = statp->defdname; *pp = NULL; dots = 0; for (cp = statp->defdname; *cp; cp++) dots += (*cp == '.'); cp = statp->defdname; while (pp < statp->dnsrch + MAXDFLSRCH) { if (dots < LOCALDOMAINPARTS) break; cp = strchr(cp, '.') + 1; /*%< we know there is one */ *pp++ = cp; dots--; } *pp = NULL; #ifdef DEBUG if (statp->options & RES_DEBUG) { printf(";; res_init()... default dnsrch list:\n"); for (pp = statp->dnsrch; *pp; pp++) printf(";;\t%s\n", *pp); printf(";;\t..END..\n"); } #endif } if (issetugid()) statp->options |= RES_NOALIASES; else if ((cp = getenv("RES_OPTIONS")) != NULL) res_setoptions(statp, cp, "env"); statp->options |= RES_INIT; return (statp->res_h_errno); } static void res_setoptions(res_state statp, const char *options, const char *source) { const char *cp = options; int i; #ifndef _LIBC struct __res_state_ext *ext = statp->_u._ext.ext; #endif #ifdef DEBUG if (statp->options & RES_DEBUG) printf(";; res_setoptions(\"%s\", \"%s\")...\n", options, source); #endif while (*cp) { /* skip leading and inner runs of spaces */ while (*cp == ' ' || *cp == '\t') cp++; /* search for and process individual options */ if (!strncmp(cp, "ndots:", sizeof("ndots:") - 1)) { i = atoi(cp + sizeof("ndots:") - 1); if (i <= RES_MAXNDOTS) statp->ndots = i; else statp->ndots = RES_MAXNDOTS; #ifdef DEBUG if (statp->options & RES_DEBUG) printf(";;\tndots=%d\n", statp->ndots); #endif } else if (!strncmp(cp, "timeout:", sizeof("timeout:") - 1)) { i = atoi(cp + sizeof("timeout:") - 1); if (i <= RES_MAXRETRANS) statp->retrans = i; else statp->retrans = RES_MAXRETRANS; #ifdef DEBUG if (statp->options & RES_DEBUG) printf(";;\ttimeout=%d\n", statp->retrans); #endif #ifdef SOLARIS2 } else if (!strncmp(cp, "retrans:", sizeof("retrans:") - 1)) { /* * For backward compatibility, 'retrans' is * supported as an alias for 'timeout', though * without an imposed maximum. */ statp->retrans = atoi(cp + sizeof("retrans:") - 1); } else if (!strncmp(cp, "retry:", sizeof("retry:") - 1)){ /* * For backward compatibility, 'retry' is * supported as an alias for 'attempts', though * without an imposed maximum. */ statp->retry = atoi(cp + sizeof("retry:") - 1); #endif /* SOLARIS2 */ } else if (!strncmp(cp, "attempts:", sizeof("attempts:") - 1)){ i = atoi(cp + sizeof("attempts:") - 1); if (i <= RES_MAXRETRY) statp->retry = i; else statp->retry = RES_MAXRETRY; #ifdef DEBUG if (statp->options & RES_DEBUG) printf(";;\tattempts=%d\n", statp->retry); #endif } else if (!strncmp(cp, "debug", sizeof("debug") - 1)) { #ifdef DEBUG if (!(statp->options & RES_DEBUG)) { printf(";; res_setoptions(\"%s\", \"%s\")..\n", options, source); statp->options |= RES_DEBUG; } printf(";;\tdebug\n"); #endif } else if (!strncmp(cp, "no_tld_query", sizeof("no_tld_query") - 1) || !strncmp(cp, "no-tld-query", sizeof("no-tld-query") - 1)) { statp->options |= RES_NOTLDQUERY; } else if (!strncmp(cp, "inet6", sizeof("inet6") - 1)) { statp->options |= RES_USE_INET6; } else if (!strncmp(cp, "insecure1", sizeof("insecure1") - 1)) { statp->options |= RES_INSECURE1; } else if (!strncmp(cp, "insecure2", sizeof("insecure2") - 1)) { statp->options |= RES_INSECURE2; } else if (!strncmp(cp, "rotate", sizeof("rotate") - 1)) { statp->options |= RES_ROTATE; } else if (!strncmp(cp, "no-check-names", sizeof("no-check-names") - 1)) { statp->options |= RES_NOCHECKNAME; } #ifdef RES_USE_EDNS0 else if (!strncmp(cp, "edns0", sizeof("edns0") - 1)) { statp->options |= RES_USE_EDNS0; } #endif #ifndef _LIBC else if (!strncmp(cp, "dname", sizeof("dname") - 1)) { statp->options |= RES_USE_DNAME; } else if (!strncmp(cp, "nibble:", sizeof("nibble:") - 1)) { if (ext == NULL) goto skip; cp += sizeof("nibble:") - 1; i = MIN(strcspn(cp, " \t"), sizeof(ext->nsuffix) - 1); strncpy(ext->nsuffix, cp, i); ext->nsuffix[i] = '\0'; } else if (!strncmp(cp, "nibble2:", sizeof("nibble2:") - 1)) { if (ext == NULL) goto skip; cp += sizeof("nibble2:") - 1; i = MIN(strcspn(cp, " \t"), sizeof(ext->nsuffix2) - 1); strncpy(ext->nsuffix2, cp, i); ext->nsuffix2[i] = '\0'; } else if (!strncmp(cp, "v6revmode:", sizeof("v6revmode:") - 1)) { cp += sizeof("v6revmode:") - 1; /* "nibble" and "bitstring" used to be valid */ if (!strncmp(cp, "single", sizeof("single") - 1)) { statp->options |= RES_NO_NIBBLE2; } else if (!strncmp(cp, "both", sizeof("both") - 1)) { statp->options &= ~RES_NO_NIBBLE2; } } #endif else { /* XXX - print a warning here? */ } #ifndef _LIBC skip: #endif /* skip to next run of spaces */ while (*cp && *cp != ' ' && *cp != '\t') cp++; } } #ifdef RESOLVSORT /* XXX - should really support CIDR which means explicit masks always. */ static u_int32_t -net_mask(in) /*!< XXX - should really use system's version of this */ - struct in_addr in; +net_mask(struct in_addr in) /*!< XXX - should really use system's version of this */ { u_int32_t i = ntohl(in.s_addr); if (IN_CLASSA(i)) return (htonl(IN_CLASSA_NET)); else if (IN_CLASSB(i)) return (htonl(IN_CLASSB_NET)); return (htonl(IN_CLASSC_NET)); } #endif static u_char srnd[16]; void res_rndinit(res_state statp) { struct timeval now; u_int32_t u32; u_int16_t u16; u_char *rnd = statp->_rnd == NULL ? srnd : statp->_rnd; gettimeofday(&now, NULL); u32 = now.tv_sec; memcpy(rnd, &u32, 4); u32 = now.tv_usec; memcpy(rnd + 4, &u32, 4); u32 += now.tv_sec; memcpy(rnd + 8, &u32, 4); u16 = getpid(); memcpy(rnd + 12, &u16, 2); } u_int res_nrandomid(res_state statp) { struct timeval now; u_int16_t u16; MD5_CTX ctx; u_char *rnd = statp->_rnd == NULL ? srnd : statp->_rnd; gettimeofday(&now, NULL); u16 = (u_int16_t) (now.tv_sec ^ now.tv_usec); memcpy(rnd + 14, &u16, 2); #ifndef HAVE_MD5 MD5_Init(&ctx); MD5_Update(&ctx, rnd, 16); MD5_Final(rnd, &ctx); #else MD5Init(&ctx); MD5Update(&ctx, rnd, 16); MD5Final(rnd, &ctx); #endif memcpy(&u16, rnd + 14, 2); return ((u_int) u16); } /*% * This routine is for closing the socket if a virtual circuit is used and * the program wants to close it. This provides support for endhostent() * which expects to close the socket. * * This routine is not expected to be user visible. */ void res_nclose(res_state statp) { int ns; if (statp->_vcsock >= 0) { (void) _close(statp->_vcsock); statp->_vcsock = -1; statp->_flags &= ~(RES_F_VC | RES_F_CONN); } for (ns = 0; ns < statp->_u._ext.nscount; ns++) { if (statp->_u._ext.nssocks[ns] != -1) { (void) _close(statp->_u._ext.nssocks[ns]); statp->_u._ext.nssocks[ns] = -1; } } } void res_ndestroy(res_state statp) { res_nclose(statp); if (statp->_u._ext.ext != NULL) { free(statp->_u._ext.ext); statp->_u._ext.ext = NULL; } if (statp->_rnd != NULL) { free(statp->_rnd); statp->_rnd = NULL; } statp->options &= ~RES_INIT; } #ifndef _LIBC const char * res_get_nibblesuffix(res_state statp) { if (statp->_u._ext.ext) return (statp->_u._ext.ext->nsuffix); return ("ip6.arpa"); } const char * res_get_nibblesuffix2(res_state statp) { if (statp->_u._ext.ext) return (statp->_u._ext.ext->nsuffix2); return ("ip6.int"); } #endif void res_setservers(res_state statp, const union res_sockaddr_union *set, int cnt) { int i, nserv; size_t size; /* close open servers */ res_nclose(statp); /* cause rtt times to be forgotten */ statp->_u._ext.nscount = 0; nserv = 0; for (i = 0; i < cnt && nserv < MAXNS; i++) { switch (set->sin.sin_family) { case AF_INET: size = sizeof(set->sin); if (statp->_u._ext.ext) memcpy(&statp->_u._ext.ext->nsaddrs[nserv], &set->sin, size); if (size <= sizeof(statp->nsaddr_list[nserv])) memcpy(&statp->nsaddr_list[nserv], &set->sin, size); else statp->nsaddr_list[nserv].sin_family = 0; nserv++; break; #ifdef HAS_INET6_STRUCTS case AF_INET6: size = sizeof(set->sin6); if (statp->_u._ext.ext) memcpy(&statp->_u._ext.ext->nsaddrs[nserv], &set->sin6, size); if (size <= sizeof(statp->nsaddr_list[nserv])) memcpy(&statp->nsaddr_list[nserv], &set->sin6, size); else statp->nsaddr_list[nserv].sin_family = 0; nserv++; break; #endif default: break; } set++; } statp->nscount = nserv; } int res_getservers(res_state statp, union res_sockaddr_union *set, int cnt) { int i; size_t size; u_int16_t family; for (i = 0; i < statp->nscount && i < cnt; i++) { if (statp->_u._ext.ext) family = statp->_u._ext.ext->nsaddrs[i].sin.sin_family; else family = statp->nsaddr_list[i].sin_family; switch (family) { case AF_INET: size = sizeof(set->sin); if (statp->_u._ext.ext) memcpy(&set->sin, &statp->_u._ext.ext->nsaddrs[i], size); else memcpy(&set->sin, &statp->nsaddr_list[i], size); break; #ifdef HAS_INET6_STRUCTS case AF_INET6: size = sizeof(set->sin6); if (statp->_u._ext.ext) memcpy(&set->sin6, &statp->_u._ext.ext->nsaddrs[i], size); else memcpy(&set->sin6, &statp->nsaddr_list[i], size); break; #endif default: set->sin.sin_family = 0; break; } set++; } return (statp->nscount); } /*! \file */ Index: projects/clang370-import/lib/libc/resolv/res_mkupdate.c =================================================================== --- projects/clang370-import/lib/libc/resolv/res_mkupdate.c (revision 288125) +++ projects/clang370-import/lib/libc/resolv/res_mkupdate.c (revision 288126) @@ -1,1197 +1,1197 @@ /* * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC") * Copyright (c) 1996-1999 by Internet Software Consortium. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /*! \file * \brief * Based on the Dynamic DNS reference implementation by Viraj Bais * <viraj_bais@ccm.fm.intel.com> */ #if !defined(lint) && !defined(SABER) static const char rcsid[] = "$Id: res_mkupdate.c,v 1.10 2008/12/11 09:59:00 marka Exp $"; #endif /* not lint */ #include __FBSDID("$FreeBSD$"); #include "port_before.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _LIBC #include #endif #include "port_after.h" /* Options. Leave them on. */ #define DEBUG #define MAXPORT 1024 static int getnum_str(u_char **, u_char *); static int gethexnum_str(u_char **, u_char *); static int getword_str(char *, int, u_char **, u_char *); static int getstr_str(char *, int, u_char **, u_char *); #define ShrinkBuffer(x) if ((buflen -= x) < 0) return (-2); /* Forward. */ #ifdef _LIBC static #endif int res_protocolnumber(const char *); #ifdef _LIBC static #endif int res_servicenumber(const char *); /*% * Form update packets. * Returns the size of the resulting packet if no error * * On error, * returns *\li -1 if error in reading a word/number in rdata * portion for update packets *\li -2 if length of buffer passed is insufficient *\li -3 if zone section is not the first section in * the linked list, or section order has a problem *\li -4 on a number overflow *\li -5 unknown operation or no records */ int res_nmkupdate(res_state statp, ns_updrec *rrecp_in, u_char *buf, int buflen) { ns_updrec *rrecp_start = rrecp_in; HEADER *hp; u_char *cp, *sp2, *startp, *endp; int n, i, soanum, multiline; ns_updrec *rrecp; struct in_addr ina; struct in6_addr in6a; char buf2[MAXDNAME]; u_char buf3[MAXDNAME]; int section, numrrs = 0, counts[ns_s_max]; u_int16_t rtype, rclass; u_int32_t n1, rttl; u_char *dnptrs[20], **dpp, **lastdnptr; #ifndef _LIBC int siglen; #endif int keylen, certlen; /* * Initialize header fields. */ if ((buf == NULL) || (buflen < HFIXEDSZ)) return (-1); memset(buf, 0, HFIXEDSZ); hp = (HEADER *) buf; statp->id = res_nrandomid(statp); hp->id = htons(statp->id); hp->opcode = ns_o_update; hp->rcode = NOERROR; cp = buf + HFIXEDSZ; buflen -= HFIXEDSZ; dpp = dnptrs; *dpp++ = buf; *dpp++ = NULL; lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0]; if (rrecp_start == NULL) return (-5); else if (rrecp_start->r_section != S_ZONE) return (-3); memset(counts, 0, sizeof counts); for (rrecp = rrecp_start; rrecp; rrecp = NEXT(rrecp, r_glink)) { numrrs++; section = rrecp->r_section; if (section < 0 || section >= ns_s_max) return (-1); counts[section]++; for (i = section + 1; i < ns_s_max; i++) if (counts[i]) return (-3); rtype = rrecp->r_type; rclass = rrecp->r_class; rttl = rrecp->r_ttl; /* overload class and type */ if (section == S_PREREQ) { rttl = 0; switch (rrecp->r_opcode) { case YXDOMAIN: rclass = C_ANY; rtype = T_ANY; rrecp->r_size = 0; break; case NXDOMAIN: rclass = C_NONE; rtype = T_ANY; rrecp->r_size = 0; break; case NXRRSET: rclass = C_NONE; rrecp->r_size = 0; break; case YXRRSET: if (rrecp->r_size == 0) rclass = C_ANY; break; default: fprintf(stderr, "res_mkupdate: incorrect opcode: %d\n", rrecp->r_opcode); fflush(stderr); return (-1); } } else if (section == S_UPDATE) { switch (rrecp->r_opcode) { case DELETE: rclass = rrecp->r_size == 0 ? C_ANY : C_NONE; break; case ADD: break; default: fprintf(stderr, "res_mkupdate: incorrect opcode: %d\n", rrecp->r_opcode); fflush(stderr); return (-1); } } /* * XXX appending default domain to owner name is omitted, * fqdn must be provided */ if ((n = dn_comp(rrecp->r_dname, cp, buflen, dnptrs, lastdnptr)) < 0) return (-1); cp += n; ShrinkBuffer(n + 2*INT16SZ); PUTSHORT(rtype, cp); PUTSHORT(rclass, cp); if (section == S_ZONE) { if (numrrs != 1 || rrecp->r_type != T_SOA) return (-3); continue; } ShrinkBuffer(INT32SZ + INT16SZ); PUTLONG(rttl, cp); sp2 = cp; /*%< save pointer to length byte */ cp += INT16SZ; if (rrecp->r_size == 0) { if (section == S_UPDATE && rclass != C_ANY) return (-1); else { PUTSHORT(0, sp2); continue; } } startp = rrecp->r_data; endp = startp + rrecp->r_size - 1; /* XXX this should be done centrally. */ switch (rrecp->r_type) { case T_A: if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); if (!inet_aton(buf2, &ina)) return (-1); n1 = ntohl(ina.s_addr); ShrinkBuffer(INT32SZ); PUTLONG(n1, cp); break; case T_CNAME: case T_MB: case T_MG: case T_MR: case T_NS: case T_PTR: case ns_t_dname: if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); break; case T_MINFO: case T_SOA: case T_RP: for (i = 0; i < 2; i++) { if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); } if (rrecp->r_type == T_SOA) { ShrinkBuffer(5 * INT32SZ); while (isspace(*startp) || !*startp) startp++; if (*startp == '(') { multiline = 1; startp++; } else multiline = 0; /* serial, refresh, retry, expire, minimum */ for (i = 0; i < 5; i++) { soanum = getnum_str(&startp, endp); if (soanum < 0) return (-1); PUTLONG(soanum, cp); } if (multiline) { while (isspace(*startp) || !*startp) startp++; if (*startp != ')') return (-1); } } break; case T_MX: case T_AFSDB: case T_RT: n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); break; case T_SRV: n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, NULL, NULL); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); break; case T_PX: n = getnum_str(&startp, endp); if (n < 0) return (-1); PUTSHORT(n, cp); ShrinkBuffer(INT16SZ); for (i = 0; i < 2; i++) { if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); } break; case T_WKS: { char bm[MAXPORT/8]; unsigned int maxbm = 0; if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); if (!inet_aton(buf2, &ina)) return (-1); n1 = ntohl(ina.s_addr); ShrinkBuffer(INT32SZ); PUTLONG(n1, cp); if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); if ((i = res_protocolnumber(buf2)) < 0) return (-1); ShrinkBuffer(1); *cp++ = i & 0xff; for (i = 0; i < MAXPORT/8 ; i++) bm[i] = 0; while (getword_str(buf2, sizeof buf2, &startp, endp)) { if ((n = res_servicenumber(buf2)) <= 0) return (-1); if (n < MAXPORT) { bm[n/8] |= (0x80>>(n%8)); if ((unsigned)n > maxbm) maxbm = n; } else return (-1); } maxbm = maxbm/8 + 1; ShrinkBuffer(maxbm); memcpy(cp, bm, maxbm); cp += maxbm; break; } case T_HINFO: for (i = 0; i < 2; i++) { if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; } break; case T_TXT: for (;;) { if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) { if (cp != (sp2 + INT16SZ)) break; return (-1); } if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; } break; case T_X25: /* RFC1183 */ if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; break; case T_ISDN: /* RFC1183 */ if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); if ((n > 255) || (n == 0)) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) n = 0; if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; break; case T_NSAP: if ((n = inet_nsap_addr((char *)startp, (u_char *)buf2, sizeof(buf2))) != 0) { ShrinkBuffer(n); memcpy(cp, buf2, n); cp += n; } else { return (-1); } break; case T_LOC: if ((n = loc_aton((char *)startp, (u_char *)buf2)) != 0) { ShrinkBuffer(n); memcpy(cp, buf2, n); cp += n; } else return (-1); break; case ns_t_sig: #ifdef _LIBC return (-1); #else { int sig_type, success, dateerror; u_int32_t exptime, timesigned; /* type */ if ((n = getword_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); sig_type = sym_ston(__p_type_syms, buf2, &success); if (!success || sig_type == ns_t_any) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(sig_type, cp); /* alg */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(1); *cp++ = n; /* labels */ n = getnum_str(&startp, endp); if (n <= 0 || n > 255) return (-1); ShrinkBuffer(1); *cp++ = n; /* ottl & expire */ if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); exptime = ns_datetosecs(buf2, &dateerror); if (!dateerror) { ShrinkBuffer(INT32SZ); PUTLONG(rttl, cp); } else { char *ulendp; u_int32_t ottl; errno = 0; ottl = strtoul(buf2, &ulendp, 10); if (errno != 0 || (ulendp != NULL && *ulendp != '\0')) return (-1); ShrinkBuffer(INT32SZ); PUTLONG(ottl, cp); if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); exptime = ns_datetosecs(buf2, &dateerror); if (dateerror) return (-1); } /* expire */ ShrinkBuffer(INT32SZ); PUTLONG(exptime, cp); /* timesigned */ if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); timesigned = ns_datetosecs(buf2, &dateerror); if (!dateerror) { ShrinkBuffer(INT32SZ); PUTLONG(timesigned, cp); } else return (-1); /* footprint */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); /* signer name */ if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); /* sig */ if ((n = getword_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); siglen = b64_pton(buf2, buf3, sizeof(buf3)); if (siglen < 0) return (-1); ShrinkBuffer(siglen); memcpy(cp, buf3, siglen); cp += siglen; break; } #endif case ns_t_key: /* flags */ n = gethexnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); /* proto */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(1); *cp++ = n; /* alg */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(1); *cp++ = n; /* key */ if ((n = getword_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); keylen = b64_pton(buf2, buf3, sizeof(buf3)); if (keylen < 0) return (-1); ShrinkBuffer(keylen); memcpy(cp, buf3, keylen); cp += keylen; break; case ns_t_nxt: { int success, nxt_type; u_char data[32]; int maxtype; /* next name */ if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, NULL, NULL); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); maxtype = 0; memset(data, 0, sizeof data); for (;;) { if (!getword_str(buf2, sizeof buf2, &startp, endp)) break; nxt_type = sym_ston(__p_type_syms, buf2, &success); if (!success || !ns_t_rr_p(nxt_type)) return (-1); NS_NXT_BIT_SET(nxt_type, data); if (nxt_type > maxtype) maxtype = nxt_type; } n = maxtype/NS_NXT_BITS+1; ShrinkBuffer(n); memcpy(cp, data, n); cp += n; break; } case ns_t_cert: /* type */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); /* key tag */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); /* alg */ n = getnum_str(&startp, endp); if (n < 0) return (-1); ShrinkBuffer(1); *cp++ = n; /* cert */ if ((n = getword_str(buf2, sizeof buf2, &startp, endp)) < 0) return (-1); certlen = b64_pton(buf2, buf3, sizeof(buf3)); if (certlen < 0) return (-1); ShrinkBuffer(certlen); memcpy(cp, buf3, certlen); cp += certlen; break; case ns_t_aaaa: if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); if (inet_pton(AF_INET6, buf2, &in6a) <= 0) return (-1); ShrinkBuffer(NS_IN6ADDRSZ); memcpy(cp, &in6a, NS_IN6ADDRSZ); cp += NS_IN6ADDRSZ; break; case ns_t_naptr: /* Order Preference Flags Service Replacement Regexp */ /* Order */ n = getnum_str(&startp, endp); if (n < 0 || n > 65535) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); /* Preference */ n = getnum_str(&startp, endp); if (n < 0 || n > 65535) return (-1); ShrinkBuffer(INT16SZ); PUTSHORT(n, cp); /* Flags */ if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) { return (-1); } if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; /* Service Classes */ if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) { return (-1); } if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; /* Pattern */ if ((n = getstr_str(buf2, sizeof buf2, &startp, endp)) < 0) { return (-1); } if (n > 255) return (-1); ShrinkBuffer(n+1); *cp++ = n; memcpy(cp, buf2, n); cp += n; /* Replacement */ if (!getword_str(buf2, sizeof buf2, &startp, endp)) return (-1); n = dn_comp(buf2, cp, buflen, NULL, NULL); if (n < 0) return (-1); cp += n; ShrinkBuffer(n); break; default: return (-1); } /*switch*/ n = (u_int16_t)((cp - sp2) - INT16SZ); PUTSHORT(n, sp2); } /*for*/ hp->qdcount = htons(counts[0]); hp->ancount = htons(counts[1]); hp->nscount = htons(counts[2]); hp->arcount = htons(counts[3]); return (cp - buf); } /*% * Get a whitespace delimited word from a string (not file) * into buf. modify the start pointer to point after the * word in the string. */ static int getword_str(char *buf, int size, u_char **startpp, u_char *endp) { char *cp; int c; for (cp = buf; *startpp <= endp; ) { c = **startpp; if (isspace(c) || c == '\0') { if (cp != buf) /*%< trailing whitespace */ break; else { /*%< leading whitespace */ (*startpp)++; continue; } } (*startpp)++; if (cp >= buf+size-1) break; *cp++ = (u_char)c; } *cp = '\0'; return (cp != buf); } /*% * get a white spae delimited string from memory. Process quoted strings * and \\DDD escapes. Return length or -1 on error. Returned string may * contain nulls. */ static char digits[] = "0123456789"; static int getstr_str(char *buf, int size, u_char **startpp, u_char *endp) { char *cp; int c, c1 = 0; int inquote = 0; int seen_quote = 0; int escape = 0; int dig = 0; for (cp = buf; *startpp <= endp; ) { if ((c = **startpp) == '\0') break; /* leading white space */ if ((cp == buf) && !seen_quote && isspace(c)) { (*startpp)++; continue; } switch (c) { case '\\': if (!escape) { escape = 1; dig = 0; c1 = 0; (*startpp)++; continue; } goto do_escape; case '"': if (!escape) { inquote = !inquote; seen_quote = 1; (*startpp)++; continue; } /* fall through */ default: do_escape: if (escape) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c1 = c1 * 10 + (strchr(digits, c) - digits); if (++dig == 3) { c = c1 &0xff; break; } (*startpp)++; continue; } escape = 0; } else if (!inquote && isspace(c)) goto done; if (cp >= buf+size-1) goto done; *cp++ = (u_char)c; (*startpp)++; } } done: *cp = '\0'; return ((cp == buf)? (seen_quote? 0: -1): (cp - buf)); } /*% * Get a whitespace delimited base 16 number from a string (not file) into buf * update the start pointer to point after the number in the string. */ static int gethexnum_str(u_char **startpp, u_char *endp) { int c, n; int seendigit = 0; int m = 0; if (*startpp + 2 >= endp || strncasecmp((char *)*startpp, "0x", 2) != 0) return getnum_str(startpp, endp); (*startpp)+=2; for (n = 0; *startpp <= endp; ) { c = **startpp; if (isspace(c) || c == '\0') { if (seendigit) /*%< trailing whitespace */ break; else { /*%< leading whitespace */ (*startpp)++; continue; } } if (c == ';') { while ((*startpp <= endp) && ((c = **startpp) != '\n')) (*startpp)++; if (seendigit) break; continue; } if (!isxdigit(c)) { if (c == ')' && seendigit) { (*startpp)--; break; } return (-1); } (*startpp)++; if (isdigit(c)) n = n * 16 + (c - '0'); else n = n * 16 + (tolower(c) - 'a' + 10); seendigit = 1; } return (n + m); } /*% * Get a whitespace delimited base 10 number from a string (not file) into buf * update the start pointer to point after the number in the string. */ static int getnum_str(u_char **startpp, u_char *endp) { int c, n; int seendigit = 0; int m = 0; for (n = 0; *startpp <= endp; ) { c = **startpp; if (isspace(c) || c == '\0') { if (seendigit) /*%< trailing whitespace */ break; else { /*%< leading whitespace */ (*startpp)++; continue; } } if (c == ';') { while ((*startpp <= endp) && ((c = **startpp) != '\n')) (*startpp)++; if (seendigit) break; continue; } if (!isdigit(c)) { if (c == ')' && seendigit) { (*startpp)--; break; } return (-1); } (*startpp)++; n = n * 10 + (c - '0'); seendigit = 1; } return (n + m); } /*% * Allocate a resource record buffer & save rr info. */ ns_updrec * res_mkupdrec(int section, const char *dname, u_int class, u_int type, u_long ttl) { ns_updrec *rrecp = (ns_updrec *)calloc(1, sizeof(ns_updrec)); if (!rrecp || !(rrecp->r_dname = strdup(dname))) { if (rrecp) free((char *)rrecp); return (NULL); } INIT_LINK(rrecp, r_link); INIT_LINK(rrecp, r_glink); rrecp->r_class = (ns_class)class; rrecp->r_type = (ns_type)type; rrecp->r_ttl = ttl; rrecp->r_section = (ns_sect)section; return (rrecp); } /*% * Free a resource record buffer created by res_mkupdrec. */ void res_freeupdrec(ns_updrec *rrecp) { /* Note: freeing r_dp is the caller's responsibility. */ if (rrecp->r_dname != NULL) free(rrecp->r_dname); free(rrecp); } struct valuelist { struct valuelist * next; struct valuelist * prev; char * name; char * proto; int port; }; static struct valuelist *servicelist, *protolist; static void -res_buildservicelist() { +res_buildservicelist(void) { struct servent *sp; struct valuelist *slp; #ifdef MAYBE_HESIOD setservent(0); #else setservent(1); #endif while ((sp = getservent()) != NULL) { slp = (struct valuelist *)malloc(sizeof(struct valuelist)); if (!slp) break; slp->name = strdup(sp->s_name); slp->proto = strdup(sp->s_proto); if ((slp->name == NULL) || (slp->proto == NULL)) { if (slp->name) free(slp->name); if (slp->proto) free(slp->proto); free(slp); break; } slp->port = ntohs((u_int16_t)sp->s_port); /*%< host byt order */ slp->next = servicelist; slp->prev = NULL; if (servicelist) servicelist->prev = slp; servicelist = slp; } endservent(); } #ifndef _LIBC void res_destroyservicelist() { struct valuelist *slp, *slp_next; for (slp = servicelist; slp != NULL; slp = slp_next) { slp_next = slp->next; free(slp->name); free(slp->proto); free(slp); } servicelist = (struct valuelist *)0; } #endif #ifdef _LIBC static #endif void res_buildprotolist(void) { struct protoent *pp; struct valuelist *slp; #ifdef MAYBE_HESIOD setprotoent(0); #else setprotoent(1); #endif while ((pp = getprotoent()) != NULL) { slp = (struct valuelist *)malloc(sizeof(struct valuelist)); if (!slp) break; slp->name = strdup(pp->p_name); if (slp->name == NULL) { free(slp); break; } slp->port = pp->p_proto; /*%< host byte order */ slp->next = protolist; slp->prev = NULL; if (protolist) protolist->prev = slp; protolist = slp; } endprotoent(); } #ifndef _LIBC void res_destroyprotolist(void) { struct valuelist *plp, *plp_next; for (plp = protolist; plp != NULL; plp = plp_next) { plp_next = plp->next; free(plp->name); free(plp); } protolist = (struct valuelist *)0; } #endif static int findservice(const char *s, struct valuelist **list) { struct valuelist *lp = *list; int n; for (; lp != NULL; lp = lp->next) if (strcasecmp(lp->name, s) == 0) { if (lp != *list) { lp->prev->next = lp->next; if (lp->next) lp->next->prev = lp->prev; (*list)->prev = lp; lp->next = *list; *list = lp; } return (lp->port); /*%< host byte order */ } if (sscanf(s, "%d", &n) != 1 || n <= 0) n = -1; return (n); } /*% * Convert service name or (ascii) number to int. */ #ifdef _LIBC static #endif int res_servicenumber(const char *p) { if (servicelist == (struct valuelist *)0) res_buildservicelist(); return (findservice(p, &servicelist)); } /*% * Convert protocol name or (ascii) number to int. */ #ifdef _LIBC static #endif int res_protocolnumber(const char *p) { if (protolist == (struct valuelist *)0) res_buildprotolist(); return (findservice(p, &protolist)); } #ifndef _LIBC static struct servent * cgetservbyport(u_int16_t port, const char *proto) { /*%< Host byte order. */ struct valuelist **list = &servicelist; struct valuelist *lp = *list; static struct servent serv; port = ntohs(port); for (; lp != NULL; lp = lp->next) { if (port != (u_int16_t)lp->port) /*%< Host byte order. */ continue; if (strcasecmp(lp->proto, proto) == 0) { if (lp != *list) { lp->prev->next = lp->next; if (lp->next) lp->next->prev = lp->prev; (*list)->prev = lp; lp->next = *list; *list = lp; } serv.s_name = lp->name; serv.s_port = htons((u_int16_t)lp->port); serv.s_proto = lp->proto; return (&serv); } } return (0); } static struct protoent * cgetprotobynumber(int proto) { /*%< Host byte order. */ struct valuelist **list = &protolist; struct valuelist *lp = *list; static struct protoent prot; for (; lp != NULL; lp = lp->next) if (lp->port == proto) { /*%< Host byte order. */ if (lp != *list) { lp->prev->next = lp->next; if (lp->next) lp->next->prev = lp->prev; (*list)->prev = lp; lp->next = *list; *list = lp; } prot.p_name = lp->name; prot.p_proto = lp->port; /*%< Host byte order. */ return (&prot); } return (0); } const char * res_protocolname(int num) { static char number[8]; struct protoent *pp; if (protolist == (struct valuelist *)0) res_buildprotolist(); pp = cgetprotobynumber(num); if (pp == 0) { (void) sprintf(number, "%d", num); return (number); } return (pp->p_name); } const char * res_servicename(u_int16_t port, const char *proto) { /*%< Host byte order. */ static char number[8]; struct servent *ss; if (servicelist == (struct valuelist *)0) res_buildservicelist(); ss = cgetservbyport(htons(port), proto); if (ss == 0) { (void) sprintf(number, "%d", port); return (number); } return (ss->s_name); } #endif Index: projects/clang370-import/lib/libc/resolv/res_send.c =================================================================== --- projects/clang370-import/lib/libc/resolv/res_send.c (revision 288125) +++ projects/clang370-import/lib/libc/resolv/res_send.c (revision 288126) @@ -1,1180 +1,1177 @@ /* * Portions Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") * Portions Copyright (C) 1996-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ /* * Copyright (c) 1985, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Portions Copyright (c) 1993 by Digital Equipment Corporation. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies, and that * the name of Digital Equipment Corporation not be used in advertising or * publicity pertaining to distribution of the document or software without * specific, written prior permission. * * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. */ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)res_send.c 8.1 (Berkeley) 6/4/93"; static const char rcsid[] = "$Id: res_send.c,v 1.22 2009/01/22 23:49:23 tbox Exp $"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); /*! \file * \brief * Send query to name server and wait for reply. */ #include "port_before.h" #if !defined(USE_KQUEUE) && !defined(USE_POLL) #include "fd_setsize.h" #endif #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "port_after.h" #ifdef USE_KQUEUE #include #else #ifdef USE_POLL #ifdef HAVE_STROPTS_H #include #endif #include #endif /* USE_POLL */ #endif #include "un-namespace.h" /* Options. Leave them on. */ #define DEBUG #include "res_debug.h" #include "res_private.h" #define EXT(res) ((res)->_u._ext) #if !defined(USE_POLL) && !defined(USE_KQUEUE) static const int highestFD = FD_SETSIZE - 1; #endif /* Forward. */ static int get_salen(const struct sockaddr *); static struct sockaddr * get_nsaddr(res_state, size_t); static int send_vc(res_state, const u_char *, int, u_char *, int, int *, int); static int send_dg(res_state, #ifdef USE_KQUEUE int kq, #endif const u_char *, int, u_char *, int, int *, int, int, int *, int *); static void Aerror(const res_state, FILE *, const char *, int, const struct sockaddr *, int); static void Perror(const res_state, FILE *, const char *, int); static int sock_eq(struct sockaddr *, struct sockaddr *); #if defined(NEED_PSELECT) && !defined(USE_POLL) && !defined(USE_KQUEUE) static int pselect(int, void *, void *, void *, struct timespec *, const sigset_t *); #endif void res_pquery(const res_state, const u_char *, int, FILE *); static const int niflags = NI_NUMERICHOST | NI_NUMERICSERV; /* Public. */ /*% * looks up "ina" in _res.ns_addr_list[] * * returns: *\li 0 : not found *\li >0 : found * * author: *\li paul vixie, 29may94 */ int res_ourserver_p(const res_state statp, const struct sockaddr *sa) { const struct sockaddr_in *inp, *srv; const struct sockaddr_in6 *in6p, *srv6; int ns; switch (sa->sa_family) { case AF_INET: inp = (const struct sockaddr_in *)sa; for (ns = 0; ns < statp->nscount; ns++) { srv = (struct sockaddr_in *)get_nsaddr(statp, ns); if (srv->sin_family == inp->sin_family && srv->sin_port == inp->sin_port && (srv->sin_addr.s_addr == INADDR_ANY || srv->sin_addr.s_addr == inp->sin_addr.s_addr)) return (1); } break; case AF_INET6: if (EXT(statp).ext == NULL) break; in6p = (const struct sockaddr_in6 *)sa; for (ns = 0; ns < statp->nscount; ns++) { srv6 = (struct sockaddr_in6 *)get_nsaddr(statp, ns); if (srv6->sin6_family == in6p->sin6_family && srv6->sin6_port == in6p->sin6_port && #ifdef HAVE_SIN6_SCOPE_ID (srv6->sin6_scope_id == 0 || srv6->sin6_scope_id == in6p->sin6_scope_id) && #endif (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) || IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr))) return (1); } break; default: break; } return (0); } /*% * look for (name,type,class) in the query section of packet (buf,eom) * * requires: *\li buf + HFIXEDSZ <= eom * * returns: *\li -1 : format error *\li 0 : not found *\li >0 : found * * author: *\li paul vixie, 29may94 */ int res_nameinquery(const char *name, int type, int class, const u_char *buf, const u_char *eom) { const u_char *cp = buf + HFIXEDSZ; int qdcount = ntohs(((const HEADER*)buf)->qdcount); while (qdcount-- > 0) { char tname[MAXDNAME+1]; int n, ttype, tclass; n = dn_expand(buf, eom, cp, tname, sizeof tname); if (n < 0) return (-1); cp += n; if (cp + 2 * INT16SZ > eom) return (-1); ttype = ns_get16(cp); cp += INT16SZ; tclass = ns_get16(cp); cp += INT16SZ; if (ttype == type && tclass == class && ns_samename(tname, name) == 1) return (1); } return (0); } /*% * is there a 1:1 mapping of (name,type,class) * in (buf1,eom1) and (buf2,eom2)? * * returns: *\li -1 : format error *\li 0 : not a 1:1 mapping *\li >0 : is a 1:1 mapping * * author: *\li paul vixie, 29may94 */ int res_queriesmatch(const u_char *buf1, const u_char *eom1, const u_char *buf2, const u_char *eom2) { const u_char *cp = buf1 + HFIXEDSZ; int qdcount = ntohs(((const HEADER*)buf1)->qdcount); if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2) return (-1); /* * Only header section present in replies to * dynamic update packets. */ if ((((const HEADER *)buf1)->opcode == ns_o_update) && (((const HEADER *)buf2)->opcode == ns_o_update)) return (1); if (qdcount != ntohs(((const HEADER*)buf2)->qdcount)) return (0); while (qdcount-- > 0) { char tname[MAXDNAME+1]; int n, ttype, tclass; n = dn_expand(buf1, eom1, cp, tname, sizeof tname); if (n < 0) return (-1); cp += n; if (cp + 2 * INT16SZ > eom1) return (-1); ttype = ns_get16(cp); cp += INT16SZ; tclass = ns_get16(cp); cp += INT16SZ; if (!res_nameinquery(tname, ttype, tclass, buf2, eom2)) return (0); } return (1); } int res_nsend(res_state statp, const u_char *buf, int buflen, u_char *ans, int anssiz) { int gotsomewhere, terrno, tries, v_circuit, resplen, ns, n; #ifdef USE_KQUEUE int kq; #endif char abuf[NI_MAXHOST]; /* No name servers or res_init() failure */ if (statp->nscount == 0 || EXT(statp).ext == NULL) { errno = ESRCH; return (-1); } if (anssiz < HFIXEDSZ) { errno = EINVAL; return (-1); } DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_QUERY), (stdout, ";; res_send()\n"), buf, buflen); v_circuit = (statp->options & RES_USEVC) || buflen > PACKETSZ; gotsomewhere = 0; terrno = ETIMEDOUT; #ifdef USE_KQUEUE if ((kq = kqueue()) < 0) { Perror(statp, stderr, "kqueue", errno); return (-1); } #endif /* * If the ns_addr_list in the resolver context has changed, then * invalidate our cached copy and the associated timing data. */ if (EXT(statp).nscount != 0) { int needclose = 0; struct sockaddr_storage peer; ISC_SOCKLEN_T peerlen; if (EXT(statp).nscount != statp->nscount) needclose++; else for (ns = 0; ns < statp->nscount; ns++) { if (statp->nsaddr_list[ns].sin_family && !sock_eq((struct sockaddr *)&statp->nsaddr_list[ns], (struct sockaddr *)&EXT(statp).ext->nsaddrs[ns])) { needclose++; break; } if (EXT(statp).nssocks[ns] == -1) continue; peerlen = sizeof(peer); if (_getpeername(EXT(statp).nssocks[ns], (struct sockaddr *)&peer, &peerlen) < 0) { needclose++; break; } if (!sock_eq((struct sockaddr *)&peer, get_nsaddr(statp, ns))) { needclose++; break; } } if (needclose) { res_nclose(statp); EXT(statp).nscount = 0; } } /* * Maybe initialize our private copy of the ns_addr_list. */ if (EXT(statp).nscount == 0) { for (ns = 0; ns < statp->nscount; ns++) { EXT(statp).nstimes[ns] = RES_MAXTIME; EXT(statp).nssocks[ns] = -1; if (!statp->nsaddr_list[ns].sin_family) continue; EXT(statp).ext->nsaddrs[ns].sin = statp->nsaddr_list[ns]; } EXT(statp).nscount = statp->nscount; } /* * Some resolvers want to even out the load on their nameservers. * Note that RES_BLAST overrides RES_ROTATE. */ if ((statp->options & RES_ROTATE) != 0U && (statp->options & RES_BLAST) == 0U) { union res_sockaddr_union inu; struct sockaddr_in ina; int lastns = statp->nscount - 1; int fd; u_int16_t nstime; if (EXT(statp).ext != NULL) inu = EXT(statp).ext->nsaddrs[0]; ina = statp->nsaddr_list[0]; fd = EXT(statp).nssocks[0]; nstime = EXT(statp).nstimes[0]; for (ns = 0; ns < lastns; ns++) { if (EXT(statp).ext != NULL) EXT(statp).ext->nsaddrs[ns] = EXT(statp).ext->nsaddrs[ns + 1]; statp->nsaddr_list[ns] = statp->nsaddr_list[ns + 1]; EXT(statp).nssocks[ns] = EXT(statp).nssocks[ns + 1]; EXT(statp).nstimes[ns] = EXT(statp).nstimes[ns + 1]; } if (EXT(statp).ext != NULL) EXT(statp).ext->nsaddrs[lastns] = inu; statp->nsaddr_list[lastns] = ina; EXT(statp).nssocks[lastns] = fd; EXT(statp).nstimes[lastns] = nstime; } /* * Send request, RETRY times, or until successful. */ for (tries = 0; tries < statp->retry; tries++) { for (ns = 0; ns < statp->nscount; ns++) { struct sockaddr *nsap; int nsaplen; nsap = get_nsaddr(statp, ns); nsaplen = get_salen(nsap); statp->_flags &= ~RES_F_LASTMASK; statp->_flags |= (ns << RES_F_LASTSHIFT); same_ns: if (statp->qhook) { int done = 0, loops = 0; do { res_sendhookact act; act = (*statp->qhook)(&nsap, &buf, &buflen, ans, anssiz, &resplen); switch (act) { case res_goahead: done = 1; break; case res_nextns: res_nclose(statp); goto next_ns; case res_done: #ifdef USE_KQUEUE _close(kq); #endif return (resplen); case res_modified: /* give the hook another try */ if (++loops < 42) /*doug adams*/ break; /*FALLTHROUGH*/ case res_error: /*FALLTHROUGH*/ default: goto fail; } } while (!done); } Dprint(((statp->options & RES_DEBUG) && getnameinfo(nsap, nsaplen, abuf, sizeof(abuf), NULL, 0, niflags) == 0), (stdout, ";; Querying server (# %d) address = %s\n", ns + 1, abuf)); if (v_circuit) { /* Use VC; at most one attempt per server. */ tries = statp->retry; n = send_vc(statp, buf, buflen, ans, anssiz, &terrno, ns); if (n < 0) goto fail; if (n == 0) goto next_ns; resplen = n; } else { /* Use datagrams. */ n = send_dg(statp, #ifdef USE_KQUEUE kq, #endif buf, buflen, ans, anssiz, &terrno, ns, tries, &v_circuit, &gotsomewhere); if (n < 0) goto fail; if (n == 0) goto next_ns; if (v_circuit) goto same_ns; resplen = n; } Dprint((statp->options & RES_DEBUG) || ((statp->pfcode & RES_PRF_REPLY) && (statp->pfcode & RES_PRF_HEAD1)), (stdout, ";; got answer:\n")); DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_REPLY), (stdout, "%s", ""), ans, (resplen > anssiz) ? anssiz : resplen); /* * If we have temporarily opened a virtual circuit, * or if we haven't been asked to keep a socket open, * close the socket. */ if ((v_circuit && (statp->options & RES_USEVC) == 0U) || (statp->options & RES_STAYOPEN) == 0U) { res_nclose(statp); } if (statp->rhook) { int done = 0, loops = 0; do { res_sendhookact act; act = (*statp->rhook)(nsap, buf, buflen, ans, anssiz, &resplen); switch (act) { case res_goahead: case res_done: done = 1; break; case res_nextns: res_nclose(statp); goto next_ns; case res_modified: /* give the hook another try */ if (++loops < 42) /*doug adams*/ break; /*FALLTHROUGH*/ case res_error: /*FALLTHROUGH*/ default: goto fail; } } while (!done); } #ifdef USE_KQUEUE _close(kq); #endif return (resplen); next_ns: ; } /*foreach ns*/ } /*foreach retry*/ res_nclose(statp); #ifdef USE_KQUEUE _close(kq); #endif if (!v_circuit) { if (!gotsomewhere) errno = ECONNREFUSED; /*%< no nameservers found */ else errno = ETIMEDOUT; /*%< no answer obtained */ } else errno = terrno; return (-1); fail: res_nclose(statp); #ifdef USE_KQUEUE _close(kq); #endif return (-1); } /* Private */ static int -get_salen(sa) - const struct sockaddr *sa; +get_salen(const struct sockaddr *sa) { #ifdef HAVE_SA_LEN /* There are people do not set sa_len. Be forgiving to them. */ if (sa->sa_len) return (sa->sa_len); #endif if (sa->sa_family == AF_INET) return (sizeof(struct sockaddr_in)); else if (sa->sa_family == AF_INET6) return (sizeof(struct sockaddr_in6)); else return (0); /*%< unknown, die on connect */ } /*% * pick appropriate nsaddr_list for use. see res_init() for initialization. */ static struct sockaddr * -get_nsaddr(statp, n) - res_state statp; - size_t n; +get_nsaddr(res_state statp, size_t n) { if (!statp->nsaddr_list[n].sin_family && EXT(statp).ext) { /* * - EXT(statp).ext->nsaddrs[n] holds an address that is larger * than struct sockaddr, and * - user code did not update statp->nsaddr_list[n]. */ return (struct sockaddr *)(void *)&EXT(statp).ext->nsaddrs[n]; } else { /* * - user code updated statp->nsaddr_list[n], or * - statp->nsaddr_list[n] has the same content as * EXT(statp).ext->nsaddrs[n]. */ return (struct sockaddr *)(void *)&statp->nsaddr_list[n]; } } static int send_vc(res_state statp, const u_char *buf, int buflen, u_char *ans, int anssiz, int *terrno, int ns) { const HEADER *hp = (const HEADER *) buf; HEADER *anhp = (HEADER *) ans; struct sockaddr *nsap; int nsaplen; int truncating, connreset, resplen, n; struct iovec iov[2]; u_short len; u_char *cp; void *tmp; #ifdef SO_NOSIGPIPE int on = 1; #endif nsap = get_nsaddr(statp, ns); nsaplen = get_salen(nsap); connreset = 0; same_ns: truncating = 0; /* Are we still talking to whom we want to talk to? */ if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) { struct sockaddr_storage peer; ISC_SOCKLEN_T size = sizeof peer; if (_getpeername(statp->_vcsock, (struct sockaddr *)&peer, &size) < 0 || !sock_eq((struct sockaddr *)&peer, nsap)) { res_nclose(statp); statp->_flags &= ~RES_F_VC; } } if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) { if (statp->_vcsock >= 0) res_nclose(statp); statp->_vcsock = _socket(nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0); #if !defined(USE_POLL) && !defined(USE_KQUEUE) if (statp->_vcsock > highestFD) { res_nclose(statp); errno = ENOTSOCK; } #endif if (statp->_vcsock < 0) { switch (errno) { case EPROTONOSUPPORT: #ifdef EPFNOSUPPORT case EPFNOSUPPORT: #endif case EAFNOSUPPORT: Perror(statp, stderr, "socket(vc)", errno); return (0); default: *terrno = errno; Perror(statp, stderr, "socket(vc)", errno); return (-1); } } #ifdef SO_NOSIGPIPE /* * Disable generation of SIGPIPE when writing to a closed * socket. Write should return -1 and set errno to EPIPE * instead. * * Push on even if setsockopt(SO_NOSIGPIPE) fails. */ (void)_setsockopt(statp->_vcsock, SOL_SOCKET, SO_NOSIGPIPE, &on, sizeof(on)); #endif errno = 0; if (_connect(statp->_vcsock, nsap, nsaplen) < 0) { *terrno = errno; Aerror(statp, stderr, "connect/vc", errno, nsap, nsaplen); res_nclose(statp); return (0); } statp->_flags |= RES_F_VC; } /* * Send length & message */ ns_put16((u_short)buflen, (u_char*)&len); iov[0] = evConsIovec(&len, INT16SZ); DE_CONST(buf, tmp); iov[1] = evConsIovec(tmp, buflen); if (_writev(statp->_vcsock, iov, 2) != (INT16SZ + buflen)) { *terrno = errno; Perror(statp, stderr, "write failed", errno); res_nclose(statp); return (0); } /* * Receive length & response */ read_len: cp = ans; len = INT16SZ; while ((n = _read(statp->_vcsock, (char *)cp, (int)len)) > 0) { cp += n; if ((len -= n) == 0) break; } if (n <= 0) { *terrno = errno; Perror(statp, stderr, "read failed", errno); res_nclose(statp); /* * A long running process might get its TCP * connection reset if the remote server was * restarted. Requery the server instead of * trying a new one. When there is only one * server, this means that a query might work * instead of failing. We only allow one reset * per query to prevent looping. */ if (*terrno == ECONNRESET && !connreset) { connreset = 1; res_nclose(statp); goto same_ns; } res_nclose(statp); return (0); } resplen = ns_get16(ans); if (resplen > anssiz) { Dprint(statp->options & RES_DEBUG, (stdout, ";; response truncated\n") ); truncating = 1; len = anssiz; } else len = resplen; if (len < HFIXEDSZ) { /* * Undersized message. */ Dprint(statp->options & RES_DEBUG, (stdout, ";; undersized: %d\n", len)); *terrno = EMSGSIZE; res_nclose(statp); return (0); } cp = ans; while (len != 0 && (n = _read(statp->_vcsock, (char *)cp, (int)len)) > 0) { cp += n; len -= n; } if (n <= 0) { *terrno = errno; Perror(statp, stderr, "read(vc)", errno); res_nclose(statp); return (0); } if (truncating) { /* * Flush rest of answer so connection stays in synch. */ anhp->tc = 1; len = resplen - anssiz; while (len != 0) { char junk[PACKETSZ]; n = _read(statp->_vcsock, junk, (len > sizeof junk) ? sizeof junk : len); if (n > 0) len -= n; else break; } } /* * If the calling applicating has bailed out of * a previous call and failed to arrange to have * the circuit closed or the server has got * itself confused, then drop the packet and * wait for the correct one. */ if (hp->id != anhp->id) { DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_REPLY), (stdout, ";; old answer (unexpected):\n"), ans, (resplen > anssiz) ? anssiz: resplen); goto read_len; } /* * All is well, or the error is fatal. Signal that the * next nameserver ought not be tried. */ return (resplen); } static int send_dg(res_state statp, #ifdef USE_KQUEUE int kq, #endif const u_char *buf, int buflen, u_char *ans, int anssiz, int *terrno, int ns, int tries, int *v_circuit, int *gotsomewhere) { const HEADER *hp = (const HEADER *) buf; HEADER *anhp = (HEADER *) ans; const struct sockaddr *nsap; int nsaplen; struct timespec now, timeout, finish; struct sockaddr_storage from; ISC_SOCKLEN_T fromlen; int resplen, seconds, n, s; #ifdef USE_KQUEUE struct kevent kv; #else #ifdef USE_POLL int polltimeout; struct pollfd pollfd; #else fd_set dsmask; #endif #endif nsap = get_nsaddr(statp, ns); nsaplen = get_salen(nsap); if (EXT(statp).nssocks[ns] == -1) { EXT(statp).nssocks[ns] = _socket(nsap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0); #if !defined(USE_POLL) && !defined(USE_KQUEUE) if (EXT(statp).nssocks[ns] > highestFD) { res_nclose(statp); errno = ENOTSOCK; } #endif if (EXT(statp).nssocks[ns] < 0) { switch (errno) { case EPROTONOSUPPORT: #ifdef EPFNOSUPPORT case EPFNOSUPPORT: #endif case EAFNOSUPPORT: Perror(statp, stderr, "socket(dg)", errno); return (0); default: *terrno = errno; Perror(statp, stderr, "socket(dg)", errno); return (-1); } } #ifndef CANNOT_CONNECT_DGRAM /* * On a 4.3BSD+ machine (client and server, * actually), sending to a nameserver datagram * port with no nameserver will cause an * ICMP port unreachable message to be returned. * If our datagram socket is "connected" to the * server, we get an ECONNREFUSED error on the next * socket operation, and select returns if the * error message is received. We can thus detect * the absence of a nameserver without timing out. * * When the option "insecure1" is specified, we'd * rather expect to see responses from an "unknown" * address. In order to let the kernel accept such * responses, do not connect the socket here. * XXX: or do we need an explicit option to disable * connecting? */ if (!(statp->options & RES_INSECURE1) && _connect(EXT(statp).nssocks[ns], nsap, nsaplen) < 0) { Aerror(statp, stderr, "connect(dg)", errno, nsap, nsaplen); res_nclose(statp); return (0); } #endif /* !CANNOT_CONNECT_DGRAM */ Dprint(statp->options & RES_DEBUG, (stdout, ";; new DG socket\n")) } s = EXT(statp).nssocks[ns]; #ifndef CANNOT_CONNECT_DGRAM if (statp->options & RES_INSECURE1) { if (_sendto(s, (const char*)buf, buflen, 0, nsap, nsaplen) != buflen) { Aerror(statp, stderr, "sendto", errno, nsap, nsaplen); res_nclose(statp); return (0); } } else if (send(s, (const char*)buf, buflen, 0) != buflen) { Perror(statp, stderr, "send", errno); res_nclose(statp); return (0); } #else /* !CANNOT_CONNECT_DGRAM */ if (_sendto(s, (const char*)buf, buflen, 0, nsap, nsaplen) != buflen) { Aerror(statp, stderr, "sendto", errno, nsap, nsaplen); res_nclose(statp); return (0); } #endif /* !CANNOT_CONNECT_DGRAM */ /* * Wait for reply. */ seconds = (statp->retrans << tries); if (ns > 0) seconds /= statp->nscount; if (seconds <= 0) seconds = 1; now = evNowTime(); timeout = evConsTime(seconds, 0); finish = evAddTime(now, timeout); goto nonow; wait: now = evNowTime(); nonow: #ifndef USE_POLL if (evCmpTime(finish, now) > 0) timeout = evSubTime(finish, now); else timeout = evConsTime(0, 0); #ifdef USE_KQUEUE EV_SET(&kv, s, EVFILT_READ, EV_ADD | EV_ONESHOT, 0, 0, 0); n = _kevent(kq, &kv, 1, &kv, 1, &timeout); #else FD_ZERO(&dsmask); FD_SET(s, &dsmask); n = pselect(s + 1, &dsmask, NULL, NULL, &timeout, NULL); #endif #else timeout = evSubTime(finish, now); if (timeout.tv_sec < 0) timeout = evConsTime(0, 0); polltimeout = 1000*timeout.tv_sec + timeout.tv_nsec/1000000; pollfd.fd = s; pollfd.events = POLLRDNORM; n = _poll(&pollfd, 1, polltimeout); #endif /* USE_POLL */ if (n == 0) { Dprint(statp->options & RES_DEBUG, (stdout, ";; timeout\n")); *gotsomewhere = 1; return (0); } if (n < 0) { if (errno == EINTR) goto wait; #ifdef USE_KQUEUE Perror(statp, stderr, "kevent", errno); #else #ifndef USE_POLL Perror(statp, stderr, "select", errno); #else Perror(statp, stderr, "poll", errno); #endif /* USE_POLL */ #endif res_nclose(statp); return (0); } #ifdef USE_KQUEUE if (kv.ident != s) goto wait; #endif errno = 0; fromlen = sizeof(from); resplen = _recvfrom(s, (char*)ans, anssiz,0, (struct sockaddr *)&from, &fromlen); if (resplen <= 0) { Perror(statp, stderr, "recvfrom", errno); res_nclose(statp); return (0); } *gotsomewhere = 1; if (resplen < HFIXEDSZ) { /* * Undersized message. */ Dprint(statp->options & RES_DEBUG, (stdout, ";; undersized: %d\n", resplen)); *terrno = EMSGSIZE; res_nclose(statp); return (0); } if (hp->id != anhp->id) { /* * response from old query, ignore it. * XXX - potential security hazard could * be detected here. */ DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_REPLY), (stdout, ";; old answer:\n"), ans, (resplen > anssiz) ? anssiz : resplen); goto wait; } if (!(statp->options & RES_INSECURE1) && !res_ourserver_p(statp, (struct sockaddr *)&from)) { /* * response from wrong server? ignore it. * XXX - potential security hazard could * be detected here. */ DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_REPLY), (stdout, ";; not our server:\n"), ans, (resplen > anssiz) ? anssiz : resplen); goto wait; } #ifdef RES_USE_EDNS0 if (anhp->rcode == FORMERR && (statp->options & RES_USE_EDNS0) != 0U) { /* * Do not retry if the server do not understand EDNS0. * The case has to be captured here, as FORMERR packet do not * carry query section, hence res_queriesmatch() returns 0. */ DprintQ(statp->options & RES_DEBUG, (stdout, "server rejected query with EDNS0:\n"), ans, (resplen > anssiz) ? anssiz : resplen); /* record the error */ statp->_flags |= RES_F_EDNS0ERR; res_nclose(statp); return (0); } #endif if (!(statp->options & RES_INSECURE2) && !res_queriesmatch(buf, buf + buflen, ans, ans + anssiz)) { /* * response contains wrong query? ignore it. * XXX - potential security hazard could * be detected here. */ DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_REPLY), (stdout, ";; wrong query name:\n"), ans, (resplen > anssiz) ? anssiz : resplen); goto wait; } if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) { DprintQ(statp->options & RES_DEBUG, (stdout, "server rejected query:\n"), ans, (resplen > anssiz) ? anssiz : resplen); res_nclose(statp); /* don't retry if called from dig */ if (!statp->pfcode) return (0); } if (!(statp->options & RES_IGNTC) && anhp->tc) { /* * To get the rest of answer, * use TCP with same server. */ Dprint(statp->options & RES_DEBUG, (stdout, ";; truncated answer\n")); *v_circuit = 1; res_nclose(statp); return (1); } /* * All is well, or the error is fatal. Signal that the * next nameserver ought not be tried. */ return (resplen); } static void Aerror(const res_state statp, FILE *file, const char *string, int error, const struct sockaddr *address, int alen) { int save = errno; char hbuf[NI_MAXHOST]; char sbuf[NI_MAXSERV]; if ((statp->options & RES_DEBUG) != 0U) { if (getnameinfo(address, alen, hbuf, sizeof(hbuf), sbuf, sizeof(sbuf), niflags)) { strncpy(hbuf, "?", sizeof(hbuf) - 1); hbuf[sizeof(hbuf) - 1] = '\0'; strncpy(sbuf, "?", sizeof(sbuf) - 1); sbuf[sizeof(sbuf) - 1] = '\0'; } fprintf(file, "res_send: %s ([%s].%s): %s\n", string, hbuf, sbuf, strerror(error)); } errno = save; } static void Perror(const res_state statp, FILE *file, const char *string, int error) { int save = errno; if ((statp->options & RES_DEBUG) != 0U) fprintf(file, "res_send: %s: %s\n", string, strerror(error)); errno = save; } static int sock_eq(struct sockaddr *a, struct sockaddr *b) { struct sockaddr_in *a4, *b4; struct sockaddr_in6 *a6, *b6; if (a->sa_family != b->sa_family) return 0; switch (a->sa_family) { case AF_INET: a4 = (struct sockaddr_in *)a; b4 = (struct sockaddr_in *)b; return a4->sin_port == b4->sin_port && a4->sin_addr.s_addr == b4->sin_addr.s_addr; case AF_INET6: a6 = (struct sockaddr_in6 *)a; b6 = (struct sockaddr_in6 *)b; return a6->sin6_port == b6->sin6_port && #ifdef HAVE_SIN6_SCOPE_ID a6->sin6_scope_id == b6->sin6_scope_id && #endif IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr); default: return 0; } } #if defined(NEED_PSELECT) && !defined(USE_POLL) && !defined(USE_KQUEUE) /* XXX needs to move to the porting library. */ static int pselect(int nfds, void *rfds, void *wfds, void *efds, struct timespec *tsp, const sigset_t *sigmask) { struct timeval tv, *tvp; sigset_t sigs; int n; if (tsp) { tvp = &tv; tv = evTimeVal(*tsp); } else tvp = NULL; if (sigmask) sigprocmask(SIG_SETMASK, sigmask, &sigs); n = select(nfds, rfds, wfds, efds, tvp); if (sigmask) sigprocmask(SIG_SETMASK, &sigs, NULL); if (tsp) *tsp = evTimeSpec(tv); return (n); } #endif Index: projects/clang370-import/lib/libc/rpc/auth_unix.c =================================================================== --- projects/clang370-import/lib/libc/rpc/auth_unix.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/auth_unix.c (revision 288126) @@ -1,382 +1,370 @@ /* $NetBSD: auth_unix.c,v 1.18 2000/07/06 03:03:30 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)auth_unix.c 1.19 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)auth_unix.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * auth_unix.c, Implements UNIX style authentication parameters. * * Copyright (C) 1984, Sun Microsystems, Inc. * * The system is very weak. The client uses no encryption for it's * credentials and only sends null verifiers. The server sends backs * null verifiers or optionally a verifier that suggests a new short hand * for the credentials. * */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "mt_misc.h" /* auth_unix.c */ static void authunix_nextverf (AUTH *); static bool_t authunix_marshal (AUTH *, XDR *); static bool_t authunix_validate (AUTH *, struct opaque_auth *); static bool_t authunix_refresh (AUTH *, void *); static void authunix_destroy (AUTH *); static void marshal_new_auth (AUTH *); static struct auth_ops *authunix_ops (void); /* * This struct is pointed to by the ah_private field of an auth_handle. */ struct audata { struct opaque_auth au_origcred; /* original credentials */ struct opaque_auth au_shcred; /* short hand cred */ u_long au_shfaults; /* short hand cache faults */ char au_marshed[MAX_AUTH_BYTES]; u_int au_mpos; /* xdr pos at end of marshed */ }; #define AUTH_PRIVATE(auth) ((struct audata *)auth->ah_private) /* * Create a unix style authenticator. * Returns an auth handle with the given stuff in it. */ AUTH * -authunix_create(machname, uid, gid, len, aup_gids) - char *machname; - u_int uid; - u_int gid; - int len; - u_int *aup_gids; +authunix_create(char *machname, u_int uid, u_int gid, int len, u_int *aup_gids) { struct authunix_parms aup; char mymem[MAX_AUTH_BYTES]; struct timeval now; XDR xdrs; AUTH *auth; struct audata *au; /* * Allocate and set up auth handle */ au = NULL; auth = mem_alloc(sizeof(*auth)); #ifndef _KERNEL if (auth == NULL) { warnx("authunix_create: out of memory"); goto cleanup_authunix_create; } #endif au = mem_alloc(sizeof(*au)); #ifndef _KERNEL if (au == NULL) { warnx("authunix_create: out of memory"); goto cleanup_authunix_create; } #endif auth->ah_ops = authunix_ops(); auth->ah_private = (caddr_t)au; auth->ah_verf = au->au_shcred = _null_auth; au->au_shfaults = 0; au->au_origcred.oa_base = NULL; /* * fill in param struct from the given params */ (void)gettimeofday(&now, NULL); aup.aup_time = now.tv_sec; aup.aup_machname = machname; aup.aup_uid = uid; aup.aup_gid = gid; aup.aup_len = (u_int)len; aup.aup_gids = aup_gids; /* * Serialize the parameters into origcred */ xdrmem_create(&xdrs, mymem, MAX_AUTH_BYTES, XDR_ENCODE); if (! xdr_authunix_parms(&xdrs, &aup)) abort(); au->au_origcred.oa_length = len = XDR_GETPOS(&xdrs); au->au_origcred.oa_flavor = AUTH_UNIX; #ifdef _KERNEL au->au_origcred.oa_base = mem_alloc((u_int) len); #else if ((au->au_origcred.oa_base = mem_alloc((u_int) len)) == NULL) { warnx("authunix_create: out of memory"); goto cleanup_authunix_create; } #endif memmove(au->au_origcred.oa_base, mymem, (size_t)len); /* * set auth handle to reflect new cred. */ auth->ah_cred = au->au_origcred; marshal_new_auth(auth); return (auth); #ifndef _KERNEL cleanup_authunix_create: if (auth) mem_free(auth, sizeof(*auth)); if (au) { if (au->au_origcred.oa_base) mem_free(au->au_origcred.oa_base, (u_int)len); mem_free(au, sizeof(*au)); } return (NULL); #endif } /* * Returns an auth handle with parameters determined by doing lots of * syscalls. */ AUTH * -authunix_create_default() +authunix_create_default(void) { AUTH *auth; int ngids; long ngids_max; char machname[MAXHOSTNAMELEN + 1]; uid_t uid; gid_t gid; gid_t *gids; ngids_max = sysconf(_SC_NGROUPS_MAX) + 1; gids = malloc(sizeof(gid_t) * ngids_max); if (gids == NULL) return (NULL); if (gethostname(machname, sizeof machname) == -1) abort(); machname[sizeof(machname) - 1] = 0; uid = geteuid(); gid = getegid(); if ((ngids = getgroups(ngids_max, gids)) < 0) abort(); if (ngids > NGRPS) ngids = NGRPS; /* XXX: interface problem; we should translate from uid_t and gid_t */ auth = authunix_create(machname, uid, gid, ngids, gids); free(gids); return (auth); } /* * authunix operations */ /* ARGSUSED */ static void -authunix_nextverf(auth) - AUTH *auth; +authunix_nextverf(AUTH *auth) { /* no action necessary */ } static bool_t -authunix_marshal(auth, xdrs) - AUTH *auth; - XDR *xdrs; +authunix_marshal(AUTH *auth, XDR *xdrs) { struct audata *au; assert(auth != NULL); assert(xdrs != NULL); au = AUTH_PRIVATE(auth); return (XDR_PUTBYTES(xdrs, au->au_marshed, au->au_mpos)); } static bool_t -authunix_validate(auth, verf) - AUTH *auth; - struct opaque_auth *verf; +authunix_validate(AUTH *auth, struct opaque_auth *verf) { struct audata *au; XDR xdrs; assert(auth != NULL); assert(verf != NULL); if (verf->oa_flavor == AUTH_SHORT) { au = AUTH_PRIVATE(auth); xdrmem_create(&xdrs, verf->oa_base, verf->oa_length, XDR_DECODE); if (au->au_shcred.oa_base != NULL) { mem_free(au->au_shcred.oa_base, au->au_shcred.oa_length); au->au_shcred.oa_base = NULL; } if (xdr_opaque_auth(&xdrs, &au->au_shcred)) { auth->ah_cred = au->au_shcred; } else { xdrs.x_op = XDR_FREE; (void)xdr_opaque_auth(&xdrs, &au->au_shcred); au->au_shcred.oa_base = NULL; auth->ah_cred = au->au_origcred; } marshal_new_auth(auth); } return (TRUE); } static bool_t authunix_refresh(AUTH *auth, void *dummy) { struct audata *au = AUTH_PRIVATE(auth); struct authunix_parms aup; struct timeval now; XDR xdrs; int stat; assert(auth != NULL); if (auth->ah_cred.oa_base == au->au_origcred.oa_base) { /* there is no hope. Punt */ return (FALSE); } au->au_shfaults ++; /* first deserialize the creds back into a struct authunix_parms */ aup.aup_machname = NULL; aup.aup_gids = NULL; xdrmem_create(&xdrs, au->au_origcred.oa_base, au->au_origcred.oa_length, XDR_DECODE); stat = xdr_authunix_parms(&xdrs, &aup); if (! stat) goto done; /* update the time and serialize in place */ (void)gettimeofday(&now, NULL); aup.aup_time = now.tv_sec; xdrs.x_op = XDR_ENCODE; XDR_SETPOS(&xdrs, 0); stat = xdr_authunix_parms(&xdrs, &aup); if (! stat) goto done; auth->ah_cred = au->au_origcred; marshal_new_auth(auth); done: /* free the struct authunix_parms created by deserializing */ xdrs.x_op = XDR_FREE; (void)xdr_authunix_parms(&xdrs, &aup); XDR_DESTROY(&xdrs); return (stat); } static void -authunix_destroy(auth) - AUTH *auth; +authunix_destroy(AUTH *auth) { struct audata *au; assert(auth != NULL); au = AUTH_PRIVATE(auth); mem_free(au->au_origcred.oa_base, au->au_origcred.oa_length); if (au->au_shcred.oa_base != NULL) mem_free(au->au_shcred.oa_base, au->au_shcred.oa_length); mem_free(auth->ah_private, sizeof(struct audata)); if (auth->ah_verf.oa_base != NULL) mem_free(auth->ah_verf.oa_base, auth->ah_verf.oa_length); mem_free(auth, sizeof(*auth)); } /* * Marshals (pre-serializes) an auth struct. * sets private data, au_marshed and au_mpos */ static void -marshal_new_auth(auth) - AUTH *auth; +marshal_new_auth(AUTH *auth) { XDR xdr_stream; XDR *xdrs = &xdr_stream; struct audata *au; assert(auth != NULL); au = AUTH_PRIVATE(auth); xdrmem_create(xdrs, au->au_marshed, MAX_AUTH_BYTES, XDR_ENCODE); if ((! xdr_opaque_auth(xdrs, &(auth->ah_cred))) || (! xdr_opaque_auth(xdrs, &(auth->ah_verf)))) warnx("auth_none.c - Fatal marshalling problem"); else au->au_mpos = XDR_GETPOS(xdrs); XDR_DESTROY(xdrs); } static struct auth_ops * -authunix_ops() +authunix_ops(void) { static struct auth_ops ops; /* VARIABLES PROTECTED BY ops_lock: ops */ mutex_lock(&ops_lock); if (ops.ah_nextverf == NULL) { ops.ah_nextverf = authunix_nextverf; ops.ah_marshal = authunix_marshal; ops.ah_validate = authunix_validate; ops.ah_refresh = authunix_refresh; ops.ah_destroy = authunix_destroy; } mutex_unlock(&ops_lock); return (&ops); } Index: projects/clang370-import/lib/libc/rpc/authdes_prot.c =================================================================== --- projects/clang370-import/lib/libc/rpc/authdes_prot.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/authdes_prot.c (revision 288126) @@ -1,93 +1,89 @@ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)authdes_prot.c 2.1 88/07/29 4.0 RPCSRC; from 1.6 88/02/08 SMI"; #endif #include __FBSDID("$FreeBSD$"); /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ /* * authdes_prot.c, XDR routines for DES authentication */ #include "namespace.h" #include #include #include #include #include "un-namespace.h" #define ATTEMPT(xdr_op) if (!(xdr_op)) return (FALSE) bool_t -xdr_authdes_cred(xdrs, cred) - XDR *xdrs; - struct authdes_cred *cred; +xdr_authdes_cred(XDR *xdrs, struct authdes_cred *cred) { enum authdes_namekind *padc_namekind = &cred->adc_namekind; /* * Unrolled xdr */ ATTEMPT(xdr_enum(xdrs, (enum_t *) padc_namekind)); switch (cred->adc_namekind) { case ADN_FULLNAME: ATTEMPT(xdr_string(xdrs, &cred->adc_fullname.name, MAXNETNAMELEN)); ATTEMPT(xdr_opaque(xdrs, (caddr_t)&cred->adc_fullname.key, sizeof(des_block))); ATTEMPT(xdr_opaque(xdrs, (caddr_t)&cred->adc_fullname.window, sizeof(cred->adc_fullname.window))); return (TRUE); case ADN_NICKNAME: ATTEMPT(xdr_opaque(xdrs, (caddr_t)&cred->adc_nickname, sizeof(cred->adc_nickname))); return (TRUE); default: return (FALSE); } } bool_t -xdr_authdes_verf(xdrs, verf) - XDR *xdrs; - struct authdes_verf *verf; +xdr_authdes_verf(XDR *xdrs, struct authdes_verf *verf) { /* * Unrolled xdr */ ATTEMPT(xdr_opaque(xdrs, (caddr_t)&verf->adv_xtimestamp, sizeof(des_block))); ATTEMPT(xdr_opaque(xdrs, (caddr_t)&verf->adv_int_u, sizeof(verf->adv_int_u))); return (TRUE); } Index: projects/clang370-import/lib/libc/rpc/authunix_prot.c =================================================================== --- projects/clang370-import/lib/libc/rpc/authunix_prot.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/authunix_prot.c (revision 288126) @@ -1,78 +1,76 @@ /* $NetBSD: authunix_prot.c,v 1.12 2000/01/22 22:19:17 mycroft Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)authunix_prot.c 1.15 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)authunix_prot.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * authunix_prot.c * XDR for UNIX style authentication parameters for RPC * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include #include "un-namespace.h" /* * XDR for unix authentication parameters. */ bool_t -xdr_authunix_parms(xdrs, p) - XDR *xdrs; - struct authunix_parms *p; +xdr_authunix_parms(XDR *xdrs, struct authunix_parms *p) { u_int **paup_gids; assert(xdrs != NULL); assert(p != NULL); paup_gids = &p->aup_gids; if (xdr_u_long(xdrs, &(p->aup_time)) && xdr_string(xdrs, &(p->aup_machname), MAX_MACHINE_NAME) && xdr_u_int(xdrs, &(p->aup_uid)) && xdr_u_int(xdrs, &(p->aup_gid)) && xdr_array(xdrs, (char **) paup_gids, &(p->aup_len), NGRPS, sizeof(u_int), (xdrproc_t)xdr_u_int) ) { return (TRUE); } return (FALSE); } Index: projects/clang370-import/lib/libc/rpc/bindresvport.c =================================================================== --- projects/clang370-import/lib/libc/rpc/bindresvport.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/bindresvport.c (revision 288126) @@ -1,160 +1,156 @@ /* $NetBSD: bindresvport.c,v 1.19 2000/07/06 03:03:59 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "from: @(#)bindresvport.c 1.8 88/02/08 SMI"; static char *sccsid = "from: @(#)bindresvport.c 2.2 88/07/29 4.0 RPCSRC"; #endif /* from: $OpenBSD: bindresvport.c,v 1.7 1996/07/30 16:25:47 downsj Exp $ */ #include __FBSDID("$FreeBSD$"); /* * Copyright (c) 1987 by Sun Microsystems, Inc. * * Portions Copyright(C) 1996, Jason Downs. All rights reserved. */ #include "namespace.h" #include #include #include #include #include #include #include #include #include "un-namespace.h" /* * Bind a socket to a privileged IP port */ int -bindresvport(sd, sin) - int sd; - struct sockaddr_in *sin; +bindresvport(int sd, struct sockaddr_in *sin) { return bindresvport_sa(sd, (struct sockaddr *)sin); } /* * Bind a socket to a privileged IP port */ int -bindresvport_sa(sd, sa) - int sd; - struct sockaddr *sa; +bindresvport_sa(int sd, struct sockaddr *sa) { int old, error, af; struct sockaddr_storage myaddr; struct sockaddr_in *sin; #ifdef INET6 struct sockaddr_in6 *sin6; #endif int proto, portrange, portlow; u_int16_t *portp; socklen_t salen; if (sa == NULL) { salen = sizeof(myaddr); sa = (struct sockaddr *)&myaddr; if (_getsockname(sd, sa, &salen) == -1) return -1; /* errno is correctly set */ af = sa->sa_family; memset(sa, 0, salen); } else af = sa->sa_family; switch (af) { case AF_INET: proto = IPPROTO_IP; portrange = IP_PORTRANGE; portlow = IP_PORTRANGE_LOW; sin = (struct sockaddr_in *)sa; salen = sizeof(struct sockaddr_in); portp = &sin->sin_port; break; #ifdef INET6 case AF_INET6: proto = IPPROTO_IPV6; portrange = IPV6_PORTRANGE; portlow = IPV6_PORTRANGE_LOW; sin6 = (struct sockaddr_in6 *)sa; salen = sizeof(struct sockaddr_in6); portp = &sin6->sin6_port; break; #endif default: errno = EPFNOSUPPORT; return (-1); } sa->sa_family = af; sa->sa_len = salen; if (*portp == 0) { socklen_t oldlen = sizeof(old); error = _getsockopt(sd, proto, portrange, &old, &oldlen); if (error < 0) return (error); error = _setsockopt(sd, proto, portrange, &portlow, sizeof(portlow)); if (error < 0) return (error); } error = _bind(sd, sa, salen); if (*portp == 0) { int saved_errno = errno; if (error < 0) { if (_setsockopt(sd, proto, portrange, &old, sizeof(old)) < 0) errno = saved_errno; return (error); } if (sa != (struct sockaddr *)&myaddr) { /* Hmm, what did the kernel assign? */ if (_getsockname(sd, sa, &salen) < 0) errno = saved_errno; return (error); } } return (error); } Index: projects/clang370-import/lib/libc/rpc/clnt_bcast.c =================================================================== --- projects/clang370-import/lib/libc/rpc/clnt_bcast.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/clnt_bcast.c (revision 288126) @@ -1,672 +1,681 @@ /* $NetBSD: clnt_bcast.c,v 1.3 2000/07/06 03:05:20 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ #if defined(LIBC_SCCS) && !defined(lint) #ident "@(#)clnt_bcast.c 1.18 94/05/03 SMI" static char sccsid[] = "@(#)clnt_bcast.c 1.15 89/04/21 Copyr 1988 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * clnt_bcast.c * Client interface to broadcast service. * * Copyright (C) 1988, Sun Microsystems, Inc. * * The following is kludged-up support for simple rpc broadcasts. * Someday a large, complicated system will replace these routines. */ #include "namespace.h" #include #include #include #include #include #include #include #include #ifdef PORTMAP #include #include #include #endif /* PORTMAP */ #include #include #ifdef RPC_DEBUG #include #endif #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #define MAXBCAST 20 /* Max no of broadcasting transports */ #define INITTIME 4000 /* Time to wait initially */ #define WAITTIME 8000 /* Maximum time to wait */ /* * If nettype is NULL, it broadcasts on all the available * datagram_n transports. May potentially lead to broadacst storms * and hence should be used with caution, care and courage. * * The current parameter xdr packet size is limited by the max tsdu * size of the transport. If the max tsdu size of any transport is * smaller than the parameter xdr packet, then broadcast is not * sent on that transport. * * Also, the packet size should be less the packet size of * the data link layer (for ethernet it is 1400 bytes). There is * no easy way to find out the max size of the data link layer and * we are assuming that the args would be smaller than that. * * The result size has to be smaller than the transport tsdu size. * * If PORTMAP has been defined, we send two packets for UDP, one for * rpcbind and one for portmap. For those machines which support * both rpcbind and portmap, it will cause them to reply twice, and * also here it will get two responses ... inefficient and clumsy. */ struct broadif { int index; struct sockaddr_storage broadaddr; TAILQ_ENTRY(broadif) link; }; typedef TAILQ_HEAD(, broadif) broadlist_t; int __rpc_getbroadifs(int, int, int, broadlist_t *); void __rpc_freebroadifs(broadlist_t *); int __rpc_broadenable(int, int, struct broadif *); int __rpc_lowvers = 0; int __rpc_getbroadifs(int af, int proto, int socktype, broadlist_t *list) { int count = 0; struct broadif *bip; struct ifaddrs *ifap, *ifp; #ifdef INET6 struct sockaddr_in6 *sin6; #endif struct sockaddr_in *sin; struct addrinfo hints, *res; if (getifaddrs(&ifp) < 0) return 0; memset(&hints, 0, sizeof hints); hints.ai_family = af; hints.ai_protocol = proto; hints.ai_socktype = socktype; if (getaddrinfo(NULL, "sunrpc", &hints, &res) != 0) { freeifaddrs(ifp); return 0; } for (ifap = ifp; ifap != NULL; ifap = ifap->ifa_next) { if (ifap->ifa_addr->sa_family != af || !(ifap->ifa_flags & IFF_UP)) continue; bip = (struct broadif *)malloc(sizeof *bip); if (bip == NULL) break; bip->index = if_nametoindex(ifap->ifa_name); if ( #ifdef INET6 af != AF_INET6 && #endif (ifap->ifa_flags & IFF_BROADCAST) && ifap->ifa_broadaddr) { memcpy(&bip->broadaddr, ifap->ifa_broadaddr, (size_t)ifap->ifa_broadaddr->sa_len); sin = (struct sockaddr_in *)(void *)&bip->broadaddr; sin->sin_port = ((struct sockaddr_in *) (void *)res->ai_addr)->sin_port; } else #ifdef INET6 if (af == AF_INET6 && (ifap->ifa_flags & IFF_MULTICAST)) { sin6 = (struct sockaddr_in6 *)(void *)&bip->broadaddr; inet_pton(af, RPCB_MULTICAST_ADDR, &sin6->sin6_addr); sin6->sin6_family = af; sin6->sin6_len = sizeof *sin6; sin6->sin6_port = ((struct sockaddr_in6 *) (void *)res->ai_addr)->sin6_port; sin6->sin6_scope_id = bip->index; } else #endif { free(bip); continue; } TAILQ_INSERT_TAIL(list, bip, link); count++; } freeifaddrs(ifp); freeaddrinfo(res); return count; } void __rpc_freebroadifs(broadlist_t *list) { struct broadif *bip, *next; bip = TAILQ_FIRST(list); while (bip != NULL) { next = TAILQ_NEXT(bip, link); free(bip); bip = next; } } int /*ARGSUSED*/ __rpc_broadenable(int af, int s, struct broadif *bip) { int o = 1; #if 0 if (af == AF_INET6) { fprintf(stderr, "set v6 multicast if to %d\n", bip->index); if (_setsockopt(s, IPPROTO_IPV6, IPV6_MULTICAST_IF, &bip->index, sizeof bip->index) < 0) return -1; } else #endif if (_setsockopt(s, SOL_SOCKET, SO_BROADCAST, &o, sizeof o) < 0) return -1; return 0; } - +/* + * rpc_broadcast_exp() + * + * prog - program number + * vers - version number + * proc - procedure number + * xargs - xdr routine for args + * argsp - pointer to args + * xresults - xdr routine for results + * resultsp - pointer to results + * eachresult - call with each result obtained + * inittime - how long to wait initially + * waittime - maximum time to wait + * nettype - transport type + */ enum clnt_stat -rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp, - eachresult, inittime, waittime, nettype) - rpcprog_t prog; /* program number */ - rpcvers_t vers; /* version number */ - rpcproc_t proc; /* procedure number */ - xdrproc_t xargs; /* xdr routine for args */ - caddr_t argsp; /* pointer to args */ - xdrproc_t xresults; /* xdr routine for results */ - caddr_t resultsp; /* pointer to results */ - resultproc_t eachresult; /* call with each result obtained */ - int inittime; /* how long to wait initially */ - int waittime; /* maximum time to wait */ - const char *nettype; /* transport type */ +rpc_broadcast_exp(rpcprog_t prog, rpcvers_t vers, rpcproc_t proc, + xdrproc_t xargs, caddr_t argsp, xdrproc_t xresults, caddr_t resultsp, + resultproc_t eachresult, int inittime, int waittime, + const char *nettype) { enum clnt_stat stat = RPC_SUCCESS; /* Return status */ XDR xdr_stream; /* XDR stream */ XDR *xdrs = &xdr_stream; struct rpc_msg msg; /* RPC message */ struct timeval t; char *outbuf = NULL; /* Broadcast msg buffer */ char *inbuf = NULL; /* Reply buf */ int inlen; u_int maxbufsize = 0; AUTH *sys_auth = authunix_create_default(); u_int i; void *handle; char uaddress[1024]; /* A self imposed limit */ char *uaddrp = uaddress; int pmap_reply_flag; /* reply recvd from PORTMAP */ /* An array of all the suitable broadcast transports */ struct { int fd; /* File descriptor */ int af; int proto; struct netconfig *nconf; /* Netconfig structure */ u_int asize; /* Size of the addr buf */ u_int dsize; /* Size of the data buf */ struct sockaddr_storage raddr; /* Remote address */ broadlist_t nal; } fdlist[MAXBCAST]; struct pollfd pfd[MAXBCAST]; size_t fdlistno = 0; struct r_rpcb_rmtcallargs barg; /* Remote arguments */ struct r_rpcb_rmtcallres bres; /* Remote results */ size_t outlen; struct netconfig *nconf; int msec; int pollretval; int fds_found; #ifdef PORTMAP size_t outlen_pmap = 0; u_long port; /* Remote port number */ int pmap_flag = 0; /* UDP exists ? */ char *outbuf_pmap = NULL; struct rmtcallargs barg_pmap; /* Remote arguments */ struct rmtcallres bres_pmap; /* Remote results */ u_int udpbufsz = 0; #endif /* PORTMAP */ if (sys_auth == NULL) { return (RPC_SYSTEMERROR); } /* * initialization: create a fd, a broadcast address, and send the * request on the broadcast transport. * Listen on all of them and on replies, call the user supplied * function. */ if (nettype == NULL) nettype = "datagram_n"; if ((handle = __rpc_setconf(nettype)) == NULL) { AUTH_DESTROY(sys_auth); return (RPC_UNKNOWNPROTO); } while ((nconf = __rpc_getconf(handle)) != NULL) { int fd; struct __rpc_sockinfo si; if (nconf->nc_semantics != NC_TPI_CLTS) continue; if (fdlistno >= MAXBCAST) break; /* No more slots available */ if (!__rpc_nconf2sockinfo(nconf, &si)) continue; TAILQ_INIT(&fdlist[fdlistno].nal); if (__rpc_getbroadifs(si.si_af, si.si_proto, si.si_socktype, &fdlist[fdlistno].nal) == 0) continue; fd = _socket(si.si_af, si.si_socktype, si.si_proto); if (fd < 0) { stat = RPC_CANTSEND; continue; } fdlist[fdlistno].af = si.si_af; fdlist[fdlistno].proto = si.si_proto; fdlist[fdlistno].fd = fd; fdlist[fdlistno].nconf = nconf; fdlist[fdlistno].asize = __rpc_get_a_size(si.si_af); pfd[fdlistno].events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND; pfd[fdlistno].fd = fdlist[fdlistno].fd = fd; fdlist[fdlistno].dsize = __rpc_get_t_size(si.si_af, si.si_proto, 0); if (maxbufsize <= fdlist[fdlistno].dsize) maxbufsize = fdlist[fdlistno].dsize; #ifdef PORTMAP if (si.si_af == AF_INET && si.si_proto == IPPROTO_UDP) { udpbufsz = fdlist[fdlistno].dsize; if ((outbuf_pmap = malloc(udpbufsz)) == NULL) { _close(fd); stat = RPC_SYSTEMERROR; goto done_broad; } pmap_flag = 1; } #endif /* PORTMAP */ fdlistno++; } if (fdlistno == 0) { if (stat == RPC_SUCCESS) stat = RPC_UNKNOWNPROTO; goto done_broad; } if (maxbufsize == 0) { if (stat == RPC_SUCCESS) stat = RPC_CANTSEND; goto done_broad; } inbuf = malloc(maxbufsize); outbuf = malloc(maxbufsize); if ((inbuf == NULL) || (outbuf == NULL)) { stat = RPC_SYSTEMERROR; goto done_broad; } /* Serialize all the arguments which have to be sent */ (void) gettimeofday(&t, NULL); msg.rm_xid = __RPC_GETXID(&t); msg.rm_direction = CALL; msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; msg.rm_call.cb_prog = RPCBPROG; msg.rm_call.cb_vers = RPCBVERS; msg.rm_call.cb_proc = RPCBPROC_CALLIT; barg.prog = prog; barg.vers = vers; barg.proc = proc; barg.args.args_val = argsp; barg.xdr_args = xargs; bres.addr = uaddrp; bres.results.results_val = resultsp; bres.xdr_res = xresults; msg.rm_call.cb_cred = sys_auth->ah_cred; msg.rm_call.cb_verf = sys_auth->ah_verf; xdrmem_create(xdrs, outbuf, maxbufsize, XDR_ENCODE); if ((!xdr_callmsg(xdrs, &msg)) || (!xdr_rpcb_rmtcallargs(xdrs, (struct rpcb_rmtcallargs *)(void *)&barg))) { stat = RPC_CANTENCODEARGS; goto done_broad; } outlen = xdr_getpos(xdrs); xdr_destroy(xdrs); #ifdef PORTMAP /* Prepare the packet for version 2 PORTMAP */ if (pmap_flag) { msg.rm_xid++; /* One way to distinguish */ msg.rm_call.cb_prog = PMAPPROG; msg.rm_call.cb_vers = PMAPVERS; msg.rm_call.cb_proc = PMAPPROC_CALLIT; barg_pmap.prog = prog; barg_pmap.vers = vers; barg_pmap.proc = proc; barg_pmap.args_ptr = argsp; barg_pmap.xdr_args = xargs; bres_pmap.port_ptr = &port; bres_pmap.xdr_results = xresults; bres_pmap.results_ptr = resultsp; xdrmem_create(xdrs, outbuf_pmap, udpbufsz, XDR_ENCODE); if ((! xdr_callmsg(xdrs, &msg)) || (! xdr_rmtcall_args(xdrs, &barg_pmap))) { stat = RPC_CANTENCODEARGS; goto done_broad; } outlen_pmap = xdr_getpos(xdrs); xdr_destroy(xdrs); } #endif /* PORTMAP */ /* * Basic loop: broadcast the packets to transports which * support data packets of size such that one can encode * all the arguments. * Wait a while for response(s). * The response timeout grows larger per iteration. */ for (msec = inittime; msec <= waittime; msec += msec) { struct broadif *bip; /* Broadcast all the packets now */ for (i = 0; i < fdlistno; i++) { if (fdlist[i].dsize < outlen) { stat = RPC_CANTSEND; continue; } for (bip = TAILQ_FIRST(&fdlist[i].nal); bip != NULL; bip = TAILQ_NEXT(bip, link)) { void *addr; addr = &bip->broadaddr; __rpc_broadenable(fdlist[i].af, fdlist[i].fd, bip); /* * Only use version 3 if lowvers is not set */ if (!__rpc_lowvers) if (_sendto(fdlist[i].fd, outbuf, outlen, 0, (struct sockaddr*)addr, (size_t)fdlist[i].asize) != outlen) { #ifdef RPC_DEBUG perror("sendto"); #endif warnx("clnt_bcast: cannot send " "broadcast packet"); stat = RPC_CANTSEND; continue; }; #ifdef RPC_DEBUG if (!__rpc_lowvers) fprintf(stderr, "Broadcast packet sent " "for %s\n", fdlist[i].nconf->nc_netid); #endif #ifdef PORTMAP /* * Send the version 2 packet also * for UDP/IP */ if (pmap_flag && fdlist[i].proto == IPPROTO_UDP) { if (_sendto(fdlist[i].fd, outbuf_pmap, outlen_pmap, 0, addr, (size_t)fdlist[i].asize) != outlen_pmap) { warnx("clnt_bcast: " "Cannot send broadcast packet"); stat = RPC_CANTSEND; continue; } } #ifdef RPC_DEBUG fprintf(stderr, "PMAP Broadcast packet " "sent for %s\n", fdlist[i].nconf->nc_netid); #endif #endif /* PORTMAP */ } /* End for sending all packets on this transport */ } /* End for sending on all transports */ if (eachresult == NULL) { stat = RPC_SUCCESS; goto done_broad; } /* * Get all the replies from these broadcast requests */ recv_again: switch (pollretval = _poll(pfd, fdlistno, msec)) { case 0: /* timed out */ stat = RPC_TIMEDOUT; continue; case -1: /* some kind of error - we ignore it */ goto recv_again; } /* end of poll results switch */ for (i = fds_found = 0; i < fdlistno && fds_found < pollretval; i++) { bool_t done = FALSE; if (pfd[i].revents == 0) continue; else if (pfd[i].revents & POLLNVAL) { /* * Something bad has happened to this descri- * ptor. We can cause _poll() to ignore * it simply by using a negative fd. We do that * rather than compacting the pfd[] and fdlist[] * arrays. */ pfd[i].fd = -1; fds_found++; continue; } else fds_found++; #ifdef RPC_DEBUG fprintf(stderr, "response for %s\n", fdlist[i].nconf->nc_netid); #endif try_again: inlen = _recvfrom(fdlist[i].fd, inbuf, fdlist[i].dsize, 0, (struct sockaddr *)(void *)&fdlist[i].raddr, &fdlist[i].asize); if (inlen < 0) { if (errno == EINTR) goto try_again; warnx("clnt_bcast: Cannot receive reply to " "broadcast"); stat = RPC_CANTRECV; continue; } if (inlen < sizeof (u_int32_t)) continue; /* Drop that and go ahead */ /* * see if reply transaction id matches sent id. * If so, decode the results. If return id is xid + 1 * it was a PORTMAP reply */ if (*((u_int32_t *)(void *)(inbuf)) == *((u_int32_t *)(void *)(outbuf))) { pmap_reply_flag = 0; msg.acpted_rply.ar_verf = _null_auth; msg.acpted_rply.ar_results.where = (caddr_t)(void *)&bres; msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_rpcb_rmtcallres; #ifdef PORTMAP } else if (pmap_flag && *((u_int32_t *)(void *)(inbuf)) == *((u_int32_t *)(void *)(outbuf_pmap))) { pmap_reply_flag = 1; msg.acpted_rply.ar_verf = _null_auth; msg.acpted_rply.ar_results.where = (caddr_t)(void *)&bres_pmap; msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_rmtcallres; #endif /* PORTMAP */ } else continue; xdrmem_create(xdrs, inbuf, (u_int)inlen, XDR_DECODE); if (xdr_replymsg(xdrs, &msg)) { if ((msg.rm_reply.rp_stat == MSG_ACCEPTED) && (msg.acpted_rply.ar_stat == SUCCESS)) { struct netbuf taddr, *np; struct sockaddr_in *sin; #ifdef PORTMAP if (pmap_flag && pmap_reply_flag) { sin = (struct sockaddr_in *) (void *)&fdlist[i].raddr; sin->sin_port = htons((u_short)port); taddr.len = taddr.maxlen = fdlist[i].raddr.ss_len; taddr.buf = &fdlist[i].raddr; done = (*eachresult)(resultsp, &taddr, fdlist[i].nconf); } else { #endif /* PORTMAP */ #ifdef RPC_DEBUG fprintf(stderr, "uaddr %s\n", uaddrp); #endif np = uaddr2taddr( fdlist[i].nconf, uaddrp); done = (*eachresult)(resultsp, np, fdlist[i].nconf); free(np); #ifdef PORTMAP } #endif /* PORTMAP */ } /* otherwise, we just ignore the errors ... */ } /* else some kind of deserialization problem ... */ xdrs->x_op = XDR_FREE; msg.acpted_rply.ar_results.proc = (xdrproc_t) xdr_void; (void) xdr_replymsg(xdrs, &msg); (void) (*xresults)(xdrs, resultsp); XDR_DESTROY(xdrs); if (done) { stat = RPC_SUCCESS; goto done_broad; } else { goto recv_again; } } /* The recv for loop */ } /* The giant for loop */ done_broad: if (inbuf) (void) free(inbuf); if (outbuf) (void) free(outbuf); #ifdef PORTMAP if (outbuf_pmap) (void) free(outbuf_pmap); #endif /* PORTMAP */ for (i = 0; i < fdlistno; i++) { (void)_close(fdlist[i].fd); __rpc_freebroadifs(&fdlist[i].nal); } AUTH_DESTROY(sys_auth); (void) __rpc_endconf(handle); return (stat); } - +/* + * rpc_broadcast() + * + * prog - program number + * vers - version number + * proc - procedure number + * xargs - xdr routine for args + * argsp - pointer to args + * xresults - xdr routine for results + * resultsp - pointer to results + * eachresult - call with each result obtained + * nettype - transport type + */ enum clnt_stat -rpc_broadcast(prog, vers, proc, xargs, argsp, xresults, resultsp, - eachresult, nettype) - rpcprog_t prog; /* program number */ - rpcvers_t vers; /* version number */ - rpcproc_t proc; /* procedure number */ - xdrproc_t xargs; /* xdr routine for args */ - caddr_t argsp; /* pointer to args */ - xdrproc_t xresults; /* xdr routine for results */ - caddr_t resultsp; /* pointer to results */ - resultproc_t eachresult; /* call with each result obtained */ - const char *nettype; /* transport type */ +rpc_broadcast(rpcprog_t prog, rpcvers_t vers, rpcproc_t proc, xdrproc_t xargs, + caddr_t argsp, xdrproc_t xresults, caddr_t resultsp, + resultproc_t eachresult, const char *nettype) { enum clnt_stat dummy; dummy = rpc_broadcast_exp(prog, vers, proc, xargs, argsp, xresults, resultsp, eachresult, INITTIME, WAITTIME, nettype); return (dummy); } Index: projects/clang370-import/lib/libc/rpc/clnt_perror.c =================================================================== --- projects/clang370-import/lib/libc/rpc/clnt_perror.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/clnt_perror.c (revision 288126) @@ -1,332 +1,323 @@ /* $NetBSD: clnt_perror.c,v 1.24 2000/06/02 23:11:07 fvdl Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)clnt_perror.c 1.15 87/10/07 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)clnt_perror.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * clnt_perror.c * * Copyright (C) 1984, Sun Microsystems, Inc. * */ #include "namespace.h" #include #include #include #include #include #include #include #include #include "un-namespace.h" static char *buf; static char *_buf(void); static char *auth_errmsg(enum auth_stat); #define CLNT_PERROR_BUFLEN 256 static char * -_buf() +_buf(void) { if (buf == 0) buf = (char *)malloc(CLNT_PERROR_BUFLEN); return (buf); } /* * Print reply error info */ char * -clnt_sperror(rpch, s) - CLIENT *rpch; - const char *s; +clnt_sperror(CLIENT *rpch, const char *s) { struct rpc_err e; char *err; char *str; char *strstart; size_t len, i; assert(rpch != NULL); assert(s != NULL); str = _buf(); /* side effect: sets CLNT_PERROR_BUFLEN */ if (str == 0) return (0); len = CLNT_PERROR_BUFLEN; strstart = str; CLNT_GETERR(rpch, &e); if ((i = snprintf(str, len, "%s: ", s)) > 0) { str += i; len -= i; } (void)strncpy(str, clnt_sperrno(e.re_status), len - 1); i = strlen(str); str += i; len -= i; switch (e.re_status) { case RPC_SUCCESS: case RPC_CANTENCODEARGS: case RPC_CANTDECODERES: case RPC_TIMEDOUT: case RPC_PROGUNAVAIL: case RPC_PROCUNAVAIL: case RPC_CANTDECODEARGS: case RPC_SYSTEMERROR: case RPC_UNKNOWNHOST: case RPC_UNKNOWNPROTO: case RPC_PMAPFAILURE: case RPC_PROGNOTREGISTERED: case RPC_FAILED: break; case RPC_CANTSEND: case RPC_CANTRECV: i = snprintf(str, len, "; errno = %s", strerror(e.re_errno)); if (i > 0) { str += i; len -= i; } break; case RPC_VERSMISMATCH: i = snprintf(str, len, "; low version = %u, high version = %u", e.re_vers.low, e.re_vers.high); if (i > 0) { str += i; len -= i; } break; case RPC_AUTHERROR: err = auth_errmsg(e.re_why); i = snprintf(str, len, "; why = "); if (i > 0) { str += i; len -= i; } if (err != NULL) { i = snprintf(str, len, "%s",err); } else { i = snprintf(str, len, "(unknown authentication error - %d)", (int) e.re_why); } if (i > 0) { str += i; len -= i; } break; case RPC_PROGVERSMISMATCH: i = snprintf(str, len, "; low version = %u, high version = %u", e.re_vers.low, e.re_vers.high); if (i > 0) { str += i; len -= i; } break; default: /* unknown */ i = snprintf(str, len, "; s1 = %u, s2 = %u", e.re_lb.s1, e.re_lb.s2); if (i > 0) { str += i; len -= i; } break; } strstart[CLNT_PERROR_BUFLEN-1] = '\0'; return(strstart) ; } void -clnt_perror(rpch, s) - CLIENT *rpch; - const char *s; +clnt_perror(CLIENT *rpch, const char *s) { assert(rpch != NULL); assert(s != NULL); (void) fprintf(stderr, "%s\n", clnt_sperror(rpch,s)); } static const char *const rpc_errlist[] = { "RPC: Success", /* 0 - RPC_SUCCESS */ "RPC: Can't encode arguments", /* 1 - RPC_CANTENCODEARGS */ "RPC: Can't decode result", /* 2 - RPC_CANTDECODERES */ "RPC: Unable to send", /* 3 - RPC_CANTSEND */ "RPC: Unable to receive", /* 4 - RPC_CANTRECV */ "RPC: Timed out", /* 5 - RPC_TIMEDOUT */ "RPC: Incompatible versions of RPC", /* 6 - RPC_VERSMISMATCH */ "RPC: Authentication error", /* 7 - RPC_AUTHERROR */ "RPC: Program unavailable", /* 8 - RPC_PROGUNAVAIL */ "RPC: Program/version mismatch", /* 9 - RPC_PROGVERSMISMATCH */ "RPC: Procedure unavailable", /* 10 - RPC_PROCUNAVAIL */ "RPC: Server can't decode arguments", /* 11 - RPC_CANTDECODEARGS */ "RPC: Remote system error", /* 12 - RPC_SYSTEMERROR */ "RPC: Unknown host", /* 13 - RPC_UNKNOWNHOST */ "RPC: Port mapper failure", /* 14 - RPC_PMAPFAILURE */ "RPC: Program not registered", /* 15 - RPC_PROGNOTREGISTERED */ "RPC: Failed (unspecified error)", /* 16 - RPC_FAILED */ "RPC: Unknown protocol" /* 17 - RPC_UNKNOWNPROTO */ }; /* * This interface for use by clntrpc */ char * -clnt_sperrno(stat) - enum clnt_stat stat; +clnt_sperrno(enum clnt_stat stat) { unsigned int errnum = stat; if (errnum < (sizeof(rpc_errlist)/sizeof(rpc_errlist[0]))) /* LINTED interface problem */ return (char *)rpc_errlist[errnum]; return ("RPC: (unknown error code)"); } void -clnt_perrno(num) - enum clnt_stat num; +clnt_perrno(enum clnt_stat num) { (void) fprintf(stderr, "%s\n", clnt_sperrno(num)); } char * -clnt_spcreateerror(s) - const char *s; +clnt_spcreateerror(const char *s) { char *str; size_t len, i; assert(s != NULL); str = _buf(); /* side effect: sets CLNT_PERROR_BUFLEN */ if (str == 0) return(0); len = CLNT_PERROR_BUFLEN; i = snprintf(str, len, "%s: ", s); if (i > 0) len -= i; (void)strncat(str, clnt_sperrno(rpc_createerr.cf_stat), len - 1); switch (rpc_createerr.cf_stat) { case RPC_PMAPFAILURE: (void) strncat(str, " - ", len - 1); (void) strncat(str, clnt_sperrno(rpc_createerr.cf_error.re_status), len - 4); break; case RPC_SYSTEMERROR: (void)strncat(str, " - ", len - 1); (void)strncat(str, strerror(rpc_createerr.cf_error.re_errno), len - 4); break; case RPC_CANTSEND: case RPC_CANTDECODERES: case RPC_CANTENCODEARGS: case RPC_SUCCESS: case RPC_UNKNOWNPROTO: case RPC_PROGNOTREGISTERED: case RPC_FAILED: case RPC_UNKNOWNHOST: case RPC_CANTDECODEARGS: case RPC_PROCUNAVAIL: case RPC_PROGVERSMISMATCH: case RPC_PROGUNAVAIL: case RPC_AUTHERROR: case RPC_VERSMISMATCH: case RPC_TIMEDOUT: case RPC_CANTRECV: default: break; } str[CLNT_PERROR_BUFLEN-1] = '\0'; return (str); } void -clnt_pcreateerror(s) - const char *s; +clnt_pcreateerror(const char *s) { assert(s != NULL); (void) fprintf(stderr, "%s\n", clnt_spcreateerror(s)); } static const char *const auth_errlist[] = { "Authentication OK", /* 0 - AUTH_OK */ "Invalid client credential", /* 1 - AUTH_BADCRED */ "Server rejected credential", /* 2 - AUTH_REJECTEDCRED */ "Invalid client verifier", /* 3 - AUTH_BADVERF */ "Server rejected verifier", /* 4 - AUTH_REJECTEDVERF */ "Client credential too weak", /* 5 - AUTH_TOOWEAK */ "Invalid server verifier", /* 6 - AUTH_INVALIDRESP */ "Failed (unspecified error)", /* 7 - AUTH_FAILED */ "Kerberos generic error", /* 8 - AUTH_KERB_GENERIC*/ "Kerberos credential expired", /* 9 - AUTH_TIMEEXPIRE */ "Bad kerberos ticket file", /* 10 - AUTH_TKT_FILE */ "Can't decode kerberos authenticator", /* 11 - AUTH_DECODE */ "Address wrong in kerberos ticket", /* 12 - AUTH_NET_ADDR */ "GSS-API crediential problem", /* 13 - RPCSEC_GSS_CREDPROBLEM */ "GSS-API context problem" /* 14 - RPCSEC_GSS_CTXPROBLEM */ }; static char * -auth_errmsg(stat) - enum auth_stat stat; +auth_errmsg(enum auth_stat stat) { unsigned int errnum = stat; if (errnum < (sizeof(auth_errlist)/sizeof(auth_errlist[0]))) /* LINTED interface problem */ return (char *)auth_errlist[errnum]; return(NULL); } Index: projects/clang370-import/lib/libc/rpc/clnt_raw.c =================================================================== --- projects/clang370-import/lib/libc/rpc/clnt_raw.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/clnt_raw.c (revision 288126) @@ -1,314 +1,296 @@ /* $NetBSD: clnt_raw.c,v 1.20 2000/12/10 04:12:03 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)clnt_raw.c 1.22 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)clnt_raw.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * clnt_raw.c * * Copyright (C) 1984, Sun Microsystems, Inc. * * Memory based rpc for simple testing and timing. * Interface to create an rpc client and server in the same process. * This lets us similate rpc and get round trip overhead, without * any interference from the kernel. */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include "un-namespace.h" #include "mt_misc.h" #define MCALL_MSG_SIZE 24 /* * This is the "network" we will be moving stuff over. */ static struct clntraw_private { CLIENT client_object; XDR xdr_stream; char *_raw_buf; union { struct rpc_msg mashl_rpcmsg; char mashl_callmsg[MCALL_MSG_SIZE]; } u; u_int mcnt; } *clntraw_private; static enum clnt_stat clnt_raw_call(CLIENT *, rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval); static void clnt_raw_geterr(CLIENT *, struct rpc_err *); static bool_t clnt_raw_freeres(CLIENT *, xdrproc_t, void *); static void clnt_raw_abort(CLIENT *); static bool_t clnt_raw_control(CLIENT *, u_int, void *); static void clnt_raw_destroy(CLIENT *); static struct clnt_ops *clnt_raw_ops(void); /* * Create a client handle for memory based rpc. */ CLIENT * -clnt_raw_create(prog, vers) - rpcprog_t prog; - rpcvers_t vers; +clnt_raw_create(rpcprog_t prog, rpcvers_t vers) { struct clntraw_private *clp; struct rpc_msg call_msg; XDR *xdrs; CLIENT *client; mutex_lock(&clntraw_lock); if ((clp = clntraw_private) == NULL) { clp = (struct clntraw_private *)calloc(1, sizeof (*clp)); if (clp == NULL) { mutex_unlock(&clntraw_lock); return NULL; } if (__rpc_rawcombuf == NULL) __rpc_rawcombuf = (char *)calloc(UDPMSGSIZE, sizeof (char)); clp->_raw_buf = __rpc_rawcombuf; clntraw_private = clp; } xdrs = &clp->xdr_stream; client = &clp->client_object; /* * pre-serialize the static part of the call msg and stash it away */ call_msg.rm_direction = CALL; call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; /* XXX: prog and vers have been long historically :-( */ call_msg.rm_call.cb_prog = (u_int32_t)prog; call_msg.rm_call.cb_vers = (u_int32_t)vers; xdrmem_create(xdrs, clp->u.mashl_callmsg, MCALL_MSG_SIZE, XDR_ENCODE); if (! xdr_callhdr(xdrs, &call_msg)) warnx("clntraw_create - Fatal header serialization error."); clp->mcnt = XDR_GETPOS(xdrs); XDR_DESTROY(xdrs); /* * Set xdrmem for client/server shared buffer */ xdrmem_create(xdrs, clp->_raw_buf, UDPMSGSIZE, XDR_FREE); /* * create client handle */ client->cl_ops = clnt_raw_ops(); client->cl_auth = authnone_create(); mutex_unlock(&clntraw_lock); return (client); } /* ARGSUSED */ static enum clnt_stat -clnt_raw_call(h, proc, xargs, argsp, xresults, resultsp, timeout) - CLIENT *h; - rpcproc_t proc; - xdrproc_t xargs; - void *argsp; - xdrproc_t xresults; - void *resultsp; - struct timeval timeout; +clnt_raw_call(CLIENT *h, rpcproc_t proc, xdrproc_t xargs, void *argsp, + xdrproc_t xresults, void *resultsp, struct timeval timeout) { struct clntraw_private *clp = clntraw_private; XDR *xdrs = &clp->xdr_stream; struct rpc_msg msg; enum clnt_stat status; struct rpc_err error; assert(h != NULL); mutex_lock(&clntraw_lock); if (clp == NULL) { mutex_unlock(&clntraw_lock); return (RPC_FAILED); } mutex_unlock(&clntraw_lock); call_again: /* * send request */ xdrs->x_op = XDR_ENCODE; XDR_SETPOS(xdrs, 0); clp->u.mashl_rpcmsg.rm_xid ++ ; if ((! XDR_PUTBYTES(xdrs, clp->u.mashl_callmsg, clp->mcnt)) || (! XDR_PUTINT32(xdrs, &proc)) || (! AUTH_MARSHALL(h->cl_auth, xdrs)) || (! (*xargs)(xdrs, argsp))) { return (RPC_CANTENCODEARGS); } (void)XDR_GETPOS(xdrs); /* called just to cause overhead */ /* * We have to call server input routine here because this is * all going on in one process. Yuk. */ svc_getreq_common(FD_SETSIZE); /* * get results */ xdrs->x_op = XDR_DECODE; XDR_SETPOS(xdrs, 0); msg.acpted_rply.ar_verf = _null_auth; msg.acpted_rply.ar_results.where = resultsp; msg.acpted_rply.ar_results.proc = xresults; if (! xdr_replymsg(xdrs, &msg)) { /* * It's possible for xdr_replymsg() to fail partway * through its attempt to decode the result from the * server. If this happens, it will leave the reply * structure partially populated with dynamically * allocated memory. (This can happen if someone uses * clntudp_bufcreate() to create a CLIENT handle and * specifies a receive buffer size that is too small.) * This memory must be free()ed to avoid a leak. */ int op = xdrs->x_op; xdrs->x_op = XDR_FREE; xdr_replymsg(xdrs, &msg); xdrs->x_op = op; return (RPC_CANTDECODERES); } _seterr_reply(&msg, &error); status = error.re_status; if (status == RPC_SUCCESS) { if (! AUTH_VALIDATE(h->cl_auth, &msg.acpted_rply.ar_verf)) { status = RPC_AUTHERROR; } } /* end successful completion */ else { if (AUTH_REFRESH(h->cl_auth, &msg)) goto call_again; } /* end of unsuccessful completion */ if (status == RPC_SUCCESS) { if (! AUTH_VALIDATE(h->cl_auth, &msg.acpted_rply.ar_verf)) { status = RPC_AUTHERROR; } if (msg.acpted_rply.ar_verf.oa_base != NULL) { xdrs->x_op = XDR_FREE; (void)xdr_opaque_auth(xdrs, &(msg.acpted_rply.ar_verf)); } } return (status); } /*ARGSUSED*/ static void -clnt_raw_geterr(cl, err) - CLIENT *cl; - struct rpc_err *err; +clnt_raw_geterr(CLIENT *cl, struct rpc_err *err) { } /* ARGSUSED */ static bool_t -clnt_raw_freeres(cl, xdr_res, res_ptr) - CLIENT *cl; - xdrproc_t xdr_res; - void *res_ptr; +clnt_raw_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr) { struct clntraw_private *clp = clntraw_private; XDR *xdrs = &clp->xdr_stream; bool_t rval; mutex_lock(&clntraw_lock); if (clp == NULL) { rval = (bool_t) RPC_FAILED; mutex_unlock(&clntraw_lock); return (rval); } mutex_unlock(&clntraw_lock); xdrs->x_op = XDR_FREE; return ((*xdr_res)(xdrs, res_ptr)); } /*ARGSUSED*/ static void -clnt_raw_abort(cl) - CLIENT *cl; +clnt_raw_abort(CLIENT *cl) { } /*ARGSUSED*/ static bool_t -clnt_raw_control(cl, ui, str) - CLIENT *cl; - u_int ui; - void *str; +clnt_raw_control(CLIENT *cl, u_int ui, void *str) { return (FALSE); } /*ARGSUSED*/ static void -clnt_raw_destroy(cl) - CLIENT *cl; +clnt_raw_destroy(CLIENT *cl) { } static struct clnt_ops * -clnt_raw_ops() +clnt_raw_ops(void) { static struct clnt_ops ops; /* VARIABLES PROTECTED BY ops_lock: ops */ mutex_lock(&ops_lock); if (ops.cl_call == NULL) { ops.cl_call = clnt_raw_call; ops.cl_abort = clnt_raw_abort; ops.cl_geterr = clnt_raw_geterr; ops.cl_freeres = clnt_raw_freeres; ops.cl_destroy = clnt_raw_destroy; ops.cl_control = clnt_raw_control; } mutex_unlock(&ops_lock); return (&ops); } Index: projects/clang370-import/lib/libc/rpc/clnt_simple.c =================================================================== --- projects/clang370-import/lib/libc/rpc/clnt_simple.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/clnt_simple.c (revision 288126) @@ -1,205 +1,207 @@ /* $NetBSD: clnt_simple.c,v 1.21 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "from: @(#)clnt_simple.c 1.35 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "from: @(#)clnt_simple.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * clnt_simple.c * Simplified front end to client rpc. * */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "mt_misc.h" #ifndef MAXHOSTNAMELEN #define MAXHOSTNAMELEN 64 #endif #ifndef NETIDLEN #define NETIDLEN 32 #endif struct rpc_call_private { int valid; /* Is this entry valid ? */ CLIENT *client; /* Client handle */ pid_t pid; /* process-id at moment of creation */ rpcprog_t prognum; /* Program */ rpcvers_t versnum; /* Version */ char host[MAXHOSTNAMELEN]; /* Servers host */ char nettype[NETIDLEN]; /* Network type */ }; static struct rpc_call_private *rpc_call_private_main; static thread_key_t rpc_call_key; static once_t rpc_call_once = ONCE_INITIALIZER; static int rpc_call_key_error; static void rpc_call_key_init(void); static void rpc_call_destroy(void *); static void rpc_call_destroy(void *vp) { struct rpc_call_private *rcp = (struct rpc_call_private *)vp; if (rcp) { if (rcp->client) CLNT_DESTROY(rcp->client); free(rcp); } } static void rpc_call_key_init(void) { rpc_call_key_error = thr_keycreate(&rpc_call_key, rpc_call_destroy); } /* * This is the simplified interface to the client rpc layer. * The client handle is not destroyed here and is reused for * the future calls to same prog, vers, host and nettype combination. * * The total time available is 25 seconds. + * + * host - host name + * prognum - program number + * versnum - version number + * procnum - procedure number + * inproc, outproc - in/out XDR procedures + * in, out - recv/send data + * nettype - nettype */ enum clnt_stat -rpc_call(host, prognum, versnum, procnum, inproc, in, outproc, out, nettype) - const char *host; /* host name */ - rpcprog_t prognum; /* program number */ - rpcvers_t versnum; /* version number */ - rpcproc_t procnum; /* procedure number */ - xdrproc_t inproc, outproc; /* in/out XDR procedures */ - const char *in; - char *out; /* recv/send data */ - const char *nettype; /* nettype */ +rpc_call(const char *host, const rpcprog_t prognum, const rpcvers_t versnum, + const rpcproc_t procnum, const xdrproc_t inproc, const char *in, + const xdrproc_t outproc, char *out, const char *nettype) { struct rpc_call_private *rcp = (struct rpc_call_private *) 0; enum clnt_stat clnt_stat; struct timeval timeout, tottimeout; int main_thread = 1; if ((main_thread = thr_main())) { rcp = rpc_call_private_main; } else { if (thr_once(&rpc_call_once, rpc_call_key_init) != 0 || rpc_call_key_error != 0) { rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = rpc_call_key_error; return (rpc_createerr.cf_stat); } rcp = (struct rpc_call_private *)thr_getspecific(rpc_call_key); } if (rcp == NULL) { rcp = malloc(sizeof (*rcp)); if (rcp == NULL) { rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; return (rpc_createerr.cf_stat); } if (main_thread) rpc_call_private_main = rcp; else thr_setspecific(rpc_call_key, (void *) rcp); rcp->valid = 0; rcp->client = NULL; } if ((nettype == NULL) || (nettype[0] == 0)) nettype = "netpath"; if (!(rcp->valid && rcp->pid == getpid() && (rcp->prognum == prognum) && (rcp->versnum == versnum) && (!strcmp(rcp->host, host)) && (!strcmp(rcp->nettype, nettype)))) { int fd; rcp->valid = 0; if (rcp->client) CLNT_DESTROY(rcp->client); /* * Using the first successful transport for that type */ rcp->client = clnt_create(host, prognum, versnum, nettype); rcp->pid = getpid(); if (rcp->client == NULL) { return (rpc_createerr.cf_stat); } /* * Set time outs for connectionless case. Do it * unconditionally. Faster than doing a t_getinfo() * and then doing the right thing. */ timeout.tv_usec = 0; timeout.tv_sec = 5; (void) CLNT_CONTROL(rcp->client, CLSET_RETRY_TIMEOUT, (char *)(void *)&timeout); if (CLNT_CONTROL(rcp->client, CLGET_FD, (char *)(void *)&fd)) _fcntl(fd, F_SETFD, 1); /* make it "close on exec" */ rcp->prognum = prognum; rcp->versnum = versnum; if ((strlen(host) < (size_t)MAXHOSTNAMELEN) && (strlen(nettype) < (size_t)NETIDLEN)) { (void) strcpy(rcp->host, host); (void) strcpy(rcp->nettype, nettype); rcp->valid = 1; } else { rcp->valid = 0; } } /* else reuse old client */ tottimeout.tv_sec = 25; tottimeout.tv_usec = 0; /*LINTED const castaway*/ clnt_stat = CLNT_CALL(rcp->client, procnum, inproc, (char *) in, outproc, out, tottimeout); /* * if call failed, empty cache */ if (clnt_stat != RPC_SUCCESS) rcp->valid = 0; return (clnt_stat); } Index: projects/clang370-import/lib/libc/rpc/clnt_vc.c =================================================================== --- projects/clang370-import/lib/libc/rpc/clnt_vc.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/clnt_vc.c (revision 288126) @@ -1,871 +1,844 @@ /* $NetBSD: clnt_vc.c,v 1.4 2000/07/14 08:40:42 fvdl Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)clnt_tcp.c 1.37 87/10/05 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)clnt_tcp.c 2.2 88/08/01 4.0 RPCSRC"; static char sccsid3[] = "@(#)clnt_vc.c 1.19 89/03/16 Copyr 1988 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * clnt_tcp.c, Implements a TCP/IP based, client side RPC. * * Copyright (C) 1984, Sun Microsystems, Inc. * * TCP based RPC supports 'batched calls'. * A sequence of calls may be batched-up in a send buffer. The rpc call * return immediately to the client even though the call was not necessarily * sent. The batching occurs if the results' xdr routine is NULL (0) AND * the rpc timeout value is zero (see clnt.h, rpc). * * Clients should NOT casually batch calls that in fact return results; that is, * the server side should be aware that a call is batched and not produce any * return message. Batched calls that produce many result messages can * deadlock (netlock) the client and the server.... * * Now go hang yourself. */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #include "mt_misc.h" #define MCALL_MSG_SIZE 24 struct cmessage { struct cmsghdr cmsg; struct cmsgcred cmcred; }; static enum clnt_stat clnt_vc_call(CLIENT *, rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval); static void clnt_vc_geterr(CLIENT *, struct rpc_err *); static bool_t clnt_vc_freeres(CLIENT *, xdrproc_t, void *); static void clnt_vc_abort(CLIENT *); static bool_t clnt_vc_control(CLIENT *, u_int, void *); static void clnt_vc_destroy(CLIENT *); static struct clnt_ops *clnt_vc_ops(void); static bool_t time_not_ok(struct timeval *); static int read_vc(void *, void *, int); static int write_vc(void *, void *, int); static int __msgwrite(int, void *, size_t); static int __msgread(int, void *, size_t); struct ct_data { int ct_fd; /* connection's fd */ bool_t ct_closeit; /* close it on destroy */ struct timeval ct_wait; /* wait interval in milliseconds */ bool_t ct_waitset; /* wait set by clnt_control? */ struct netbuf ct_addr; /* remote addr */ struct rpc_err ct_error; union { char ct_mcallc[MCALL_MSG_SIZE]; /* marshalled callmsg */ u_int32_t ct_mcalli; } ct_u; u_int ct_mpos; /* pos after marshal */ XDR ct_xdrs; /* XDR stream */ }; /* * This machinery implements per-fd locks for MT-safety. It is not * sufficient to do per-CLIENT handle locks for MT-safety because a * user may create more than one CLIENT handle with the same fd behind * it. Therfore, we allocate an array of flags (vc_fd_locks), protected * by the clnt_fd_lock mutex, and an array (vc_cv) of condition variables * similarly protected. Vc_fd_lock[fd] == 1 => a call is activte on some * CLIENT handle created for that fd. * The current implementation holds locks across the entire RPC and reply. * Yes, this is silly, and as soon as this code is proven to work, this * should be the first thing fixed. One step at a time. */ static int *vc_fd_locks; static cond_t *vc_cv; #define release_fd_lock(fd, mask) { \ mutex_lock(&clnt_fd_lock); \ vc_fd_locks[fd] = 0; \ mutex_unlock(&clnt_fd_lock); \ thr_sigsetmask(SIG_SETMASK, &(mask), (sigset_t *) NULL); \ cond_signal(&vc_cv[fd]); \ } static const char clnt_vc_errstr[] = "%s : %s"; static const char clnt_vc_str[] = "clnt_vc_create"; static const char __no_mem_str[] = "out of memory"; /* * Create a client handle for a connection. * Default options are set, which the user can change using clnt_control()'s. * The rpc/vc package does buffering similar to stdio, so the client * must pick send and receive buffer sizes, 0 => use the default. * NB: fd is copied into a private area. * NB: The rpch->cl_auth is set null authentication. Caller may wish to * set this something more useful. * * fd should be an open socket + * + * fd - open file descriptor + * raddr - servers address + * prog - program number + * vers - version number + * sendsz - buffer send size + * recvsz - buffer recv size */ CLIENT * -clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz) - int fd; /* open file descriptor */ - const struct netbuf *raddr; /* servers address */ - const rpcprog_t prog; /* program number */ - const rpcvers_t vers; /* version number */ - u_int sendsz; /* buffer recv size */ - u_int recvsz; /* buffer send size */ +clnt_vc_create(int fd, const struct netbuf *raddr, const rpcprog_t prog, + const rpcvers_t vers, u_int sendsz, u_int recvsz) { CLIENT *cl; /* client handle */ struct ct_data *ct = NULL; /* client handle */ struct timeval now; struct rpc_msg call_msg; static u_int32_t disrupt; sigset_t mask; sigset_t newmask; struct sockaddr_storage ss; socklen_t slen; struct __rpc_sockinfo si; if (disrupt == 0) disrupt = (u_int32_t)(long)raddr; cl = (CLIENT *)mem_alloc(sizeof (*cl)); ct = (struct ct_data *)mem_alloc(sizeof (*ct)); if ((cl == (CLIENT *)NULL) || (ct == (struct ct_data *)NULL)) { (void) syslog(LOG_ERR, clnt_vc_errstr, clnt_vc_str, __no_mem_str); rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; goto err; } ct->ct_addr.buf = NULL; sigfillset(&newmask); thr_sigsetmask(SIG_SETMASK, &newmask, &mask); mutex_lock(&clnt_fd_lock); if (vc_fd_locks == (int *) NULL) { int cv_allocsz, fd_allocsz; int dtbsize = __rpc_dtbsize(); fd_allocsz = dtbsize * sizeof (int); vc_fd_locks = (int *) mem_alloc(fd_allocsz); if (vc_fd_locks == (int *) NULL) { mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); goto err; } else memset(vc_fd_locks, '\0', fd_allocsz); assert(vc_cv == (cond_t *) NULL); cv_allocsz = dtbsize * sizeof (cond_t); vc_cv = (cond_t *) mem_alloc(cv_allocsz); if (vc_cv == (cond_t *) NULL) { mem_free(vc_fd_locks, fd_allocsz); vc_fd_locks = (int *) NULL; mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); goto err; } else { int i; for (i = 0; i < dtbsize; i++) cond_init(&vc_cv[i], 0, (void *) 0); } } else assert(vc_cv != (cond_t *) NULL); /* * XXX - fvdl connecting while holding a mutex? */ slen = sizeof ss; if (_getpeername(fd, (struct sockaddr *)(void *)&ss, &slen) < 0) { if (errno != ENOTCONN) { rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); goto err; } if (_connect(fd, (struct sockaddr *)raddr->buf, raddr->len) < 0){ rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); goto err; } } mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); if (!__rpc_fd2sockinfo(fd, &si)) goto err; ct->ct_closeit = FALSE; /* * Set up private data struct */ ct->ct_fd = fd; ct->ct_wait.tv_usec = 0; ct->ct_waitset = FALSE; ct->ct_addr.buf = malloc(raddr->maxlen); if (ct->ct_addr.buf == NULL) goto err; memcpy(ct->ct_addr.buf, raddr->buf, raddr->len); ct->ct_addr.len = raddr->len; ct->ct_addr.maxlen = raddr->maxlen; /* * Initialize call message */ (void)gettimeofday(&now, NULL); call_msg.rm_xid = ((u_int32_t)++disrupt) ^ __RPC_GETXID(&now); call_msg.rm_direction = CALL; call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION; call_msg.rm_call.cb_prog = (u_int32_t)prog; call_msg.rm_call.cb_vers = (u_int32_t)vers; /* * pre-serialize the static part of the call msg and stash it away */ xdrmem_create(&(ct->ct_xdrs), ct->ct_u.ct_mcallc, MCALL_MSG_SIZE, XDR_ENCODE); if (! xdr_callhdr(&(ct->ct_xdrs), &call_msg)) { if (ct->ct_closeit) { (void)_close(fd); } goto err; } ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs)); XDR_DESTROY(&(ct->ct_xdrs)); assert(ct->ct_mpos + sizeof(uint32_t) <= MCALL_MSG_SIZE); /* * Create a client handle which uses xdrrec for serialization * and authnone for authentication. */ cl->cl_ops = clnt_vc_ops(); cl->cl_private = ct; cl->cl_auth = authnone_create(); sendsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)sendsz); recvsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)recvsz); xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, cl->cl_private, read_vc, write_vc); return (cl); err: if (ct) { if (ct->ct_addr.len) mem_free(ct->ct_addr.buf, ct->ct_addr.len); mem_free(ct, sizeof (struct ct_data)); } if (cl) mem_free(cl, sizeof (CLIENT)); return ((CLIENT *)NULL); } static enum clnt_stat -clnt_vc_call(cl, proc, xdr_args, args_ptr, xdr_results, results_ptr, timeout) - CLIENT *cl; - rpcproc_t proc; - xdrproc_t xdr_args; - void *args_ptr; - xdrproc_t xdr_results; - void *results_ptr; - struct timeval timeout; +clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, void *args_ptr, + xdrproc_t xdr_results, void *results_ptr, struct timeval timeout) { struct ct_data *ct = (struct ct_data *) cl->cl_private; XDR *xdrs = &(ct->ct_xdrs); struct rpc_msg reply_msg; u_int32_t x_id; u_int32_t *msg_x_id = &ct->ct_u.ct_mcalli; /* yuk */ bool_t shipnow; int refreshes = 2; sigset_t mask, newmask; int rpc_lock_value; bool_t reply_stat; assert(cl != NULL); sigfillset(&newmask); thr_sigsetmask(SIG_SETMASK, &newmask, &mask); mutex_lock(&clnt_fd_lock); while (vc_fd_locks[ct->ct_fd]) cond_wait(&vc_cv[ct->ct_fd], &clnt_fd_lock); if (__isthreaded) rpc_lock_value = 1; else rpc_lock_value = 0; vc_fd_locks[ct->ct_fd] = rpc_lock_value; mutex_unlock(&clnt_fd_lock); if (!ct->ct_waitset) { /* If time is not within limits, we ignore it. */ if (time_not_ok(&timeout) == FALSE) ct->ct_wait = timeout; } shipnow = (xdr_results == NULL && timeout.tv_sec == 0 && timeout.tv_usec == 0) ? FALSE : TRUE; call_again: xdrs->x_op = XDR_ENCODE; ct->ct_error.re_status = RPC_SUCCESS; x_id = ntohl(--(*msg_x_id)); if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { if ((! XDR_PUTBYTES(xdrs, ct->ct_u.ct_mcallc, ct->ct_mpos)) || (! XDR_PUTINT32(xdrs, &proc)) || (! AUTH_MARSHALL(cl->cl_auth, xdrs)) || (! (*xdr_args)(xdrs, args_ptr))) { if (ct->ct_error.re_status == RPC_SUCCESS) ct->ct_error.re_status = RPC_CANTENCODEARGS; (void)xdrrec_endofrecord(xdrs, TRUE); release_fd_lock(ct->ct_fd, mask); return (ct->ct_error.re_status); } } else { *(uint32_t *) &ct->ct_u.ct_mcallc[ct->ct_mpos] = htonl(proc); if (! __rpc_gss_wrap(cl->cl_auth, ct->ct_u.ct_mcallc, ct->ct_mpos + sizeof(uint32_t), xdrs, xdr_args, args_ptr)) { if (ct->ct_error.re_status == RPC_SUCCESS) ct->ct_error.re_status = RPC_CANTENCODEARGS; (void)xdrrec_endofrecord(xdrs, TRUE); release_fd_lock(ct->ct_fd, mask); return (ct->ct_error.re_status); } } if (! xdrrec_endofrecord(xdrs, shipnow)) { release_fd_lock(ct->ct_fd, mask); return (ct->ct_error.re_status = RPC_CANTSEND); } if (! shipnow) { release_fd_lock(ct->ct_fd, mask); return (RPC_SUCCESS); } /* * Hack to provide rpc-based message passing */ if (timeout.tv_sec == 0 && timeout.tv_usec == 0) { release_fd_lock(ct->ct_fd, mask); return(ct->ct_error.re_status = RPC_TIMEDOUT); } /* * Keep receiving until we get a valid transaction id */ xdrs->x_op = XDR_DECODE; while (TRUE) { reply_msg.acpted_rply.ar_verf = _null_auth; reply_msg.acpted_rply.ar_results.where = NULL; reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void; if (! xdrrec_skiprecord(xdrs)) { release_fd_lock(ct->ct_fd, mask); return (ct->ct_error.re_status); } /* now decode and validate the response header */ if (! xdr_replymsg(xdrs, &reply_msg)) { if (ct->ct_error.re_status == RPC_SUCCESS) continue; release_fd_lock(ct->ct_fd, mask); return (ct->ct_error.re_status); } if (reply_msg.rm_xid == x_id) break; } /* * process header */ _seterr_reply(&reply_msg, &(ct->ct_error)); if (ct->ct_error.re_status == RPC_SUCCESS) { if (! AUTH_VALIDATE(cl->cl_auth, &reply_msg.acpted_rply.ar_verf)) { ct->ct_error.re_status = RPC_AUTHERROR; ct->ct_error.re_why = AUTH_INVALIDRESP; } else { if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) { reply_stat = (*xdr_results)(xdrs, results_ptr); } else { reply_stat = __rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results, results_ptr); } if (! reply_stat) { if (ct->ct_error.re_status == RPC_SUCCESS) ct->ct_error.re_status = RPC_CANTDECODERES; } } /* free verifier ... */ if (reply_msg.acpted_rply.ar_verf.oa_base != NULL) { xdrs->x_op = XDR_FREE; (void)xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf)); } } /* end successful completion */ else { /* maybe our credentials need to be refreshed ... */ if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg)) goto call_again; } /* end of unsuccessful completion */ release_fd_lock(ct->ct_fd, mask); return (ct->ct_error.re_status); } static void -clnt_vc_geterr(cl, errp) - CLIENT *cl; - struct rpc_err *errp; +clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp) { struct ct_data *ct; assert(cl != NULL); assert(errp != NULL); ct = (struct ct_data *) cl->cl_private; *errp = ct->ct_error; } static bool_t -clnt_vc_freeres(cl, xdr_res, res_ptr) - CLIENT *cl; - xdrproc_t xdr_res; - void *res_ptr; +clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr) { struct ct_data *ct; XDR *xdrs; bool_t dummy; sigset_t mask; sigset_t newmask; assert(cl != NULL); ct = (struct ct_data *)cl->cl_private; xdrs = &(ct->ct_xdrs); sigfillset(&newmask); thr_sigsetmask(SIG_SETMASK, &newmask, &mask); mutex_lock(&clnt_fd_lock); while (vc_fd_locks[ct->ct_fd]) cond_wait(&vc_cv[ct->ct_fd], &clnt_fd_lock); xdrs->x_op = XDR_FREE; dummy = (*xdr_res)(xdrs, res_ptr); mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); cond_signal(&vc_cv[ct->ct_fd]); return dummy; } /*ARGSUSED*/ static void -clnt_vc_abort(cl) - CLIENT *cl; +clnt_vc_abort(CLIENT *cl) { } static bool_t -clnt_vc_control(cl, request, info) - CLIENT *cl; - u_int request; - void *info; +clnt_vc_control(CLIENT *cl, u_int request, void *info) { struct ct_data *ct; void *infop = info; sigset_t mask; sigset_t newmask; int rpc_lock_value; assert(cl != NULL); ct = (struct ct_data *)cl->cl_private; sigfillset(&newmask); thr_sigsetmask(SIG_SETMASK, &newmask, &mask); mutex_lock(&clnt_fd_lock); while (vc_fd_locks[ct->ct_fd]) cond_wait(&vc_cv[ct->ct_fd], &clnt_fd_lock); if (__isthreaded) rpc_lock_value = 1; else rpc_lock_value = 0; vc_fd_locks[ct->ct_fd] = rpc_lock_value; mutex_unlock(&clnt_fd_lock); switch (request) { case CLSET_FD_CLOSE: ct->ct_closeit = TRUE; release_fd_lock(ct->ct_fd, mask); return (TRUE); case CLSET_FD_NCLOSE: ct->ct_closeit = FALSE; release_fd_lock(ct->ct_fd, mask); return (TRUE); default: break; } /* for other requests which use info */ if (info == NULL) { release_fd_lock(ct->ct_fd, mask); return (FALSE); } switch (request) { case CLSET_TIMEOUT: if (time_not_ok((struct timeval *)info)) { release_fd_lock(ct->ct_fd, mask); return (FALSE); } ct->ct_wait = *(struct timeval *)infop; ct->ct_waitset = TRUE; break; case CLGET_TIMEOUT: *(struct timeval *)infop = ct->ct_wait; break; case CLGET_SERVER_ADDR: (void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len); break; case CLGET_FD: *(int *)info = ct->ct_fd; break; case CLGET_SVC_ADDR: /* The caller should not free this memory area */ *(struct netbuf *)info = ct->ct_addr; break; case CLSET_SVC_ADDR: /* set to new address */ release_fd_lock(ct->ct_fd, mask); return (FALSE); case CLGET_XID: /* * use the knowledge that xid is the * first element in the call structure * This will get the xid of the PREVIOUS call */ *(u_int32_t *)info = ntohl(*(u_int32_t *)(void *)&ct->ct_u.ct_mcalli); break; case CLSET_XID: /* This will set the xid of the NEXT call */ *(u_int32_t *)(void *)&ct->ct_u.ct_mcalli = htonl(*((u_int32_t *)info) + 1); /* increment by 1 as clnt_vc_call() decrements once */ break; case CLGET_VERS: /* * This RELIES on the information that, in the call body, * the version number field is the fifth field from the * begining of the RPC header. MUST be changed if the * call_struct is changed */ *(u_int32_t *)info = ntohl(*(u_int32_t *)(void *)(ct->ct_u.ct_mcallc + 4 * BYTES_PER_XDR_UNIT)); break; case CLSET_VERS: *(u_int32_t *)(void *)(ct->ct_u.ct_mcallc + 4 * BYTES_PER_XDR_UNIT) = htonl(*(u_int32_t *)info); break; case CLGET_PROG: /* * This RELIES on the information that, in the call body, * the program number field is the fourth field from the * begining of the RPC header. MUST be changed if the * call_struct is changed */ *(u_int32_t *)info = ntohl(*(u_int32_t *)(void *)(ct->ct_u.ct_mcallc + 3 * BYTES_PER_XDR_UNIT)); break; case CLSET_PROG: *(u_int32_t *)(void *)(ct->ct_u.ct_mcallc + 3 * BYTES_PER_XDR_UNIT) = htonl(*(u_int32_t *)info); break; default: release_fd_lock(ct->ct_fd, mask); return (FALSE); } release_fd_lock(ct->ct_fd, mask); return (TRUE); } static void -clnt_vc_destroy(cl) - CLIENT *cl; +clnt_vc_destroy(CLIENT *cl) { struct ct_data *ct = (struct ct_data *) cl->cl_private; int ct_fd = ct->ct_fd; sigset_t mask; sigset_t newmask; assert(cl != NULL); ct = (struct ct_data *) cl->cl_private; sigfillset(&newmask); thr_sigsetmask(SIG_SETMASK, &newmask, &mask); mutex_lock(&clnt_fd_lock); while (vc_fd_locks[ct_fd]) cond_wait(&vc_cv[ct_fd], &clnt_fd_lock); if (ct->ct_closeit && ct->ct_fd != -1) { (void)_close(ct->ct_fd); } XDR_DESTROY(&(ct->ct_xdrs)); if (ct->ct_addr.buf) free(ct->ct_addr.buf); mem_free(ct, sizeof(struct ct_data)); if (cl->cl_netid && cl->cl_netid[0]) mem_free(cl->cl_netid, strlen(cl->cl_netid) +1); if (cl->cl_tp && cl->cl_tp[0]) mem_free(cl->cl_tp, strlen(cl->cl_tp) +1); mem_free(cl, sizeof(CLIENT)); mutex_unlock(&clnt_fd_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); cond_signal(&vc_cv[ct_fd]); } /* * Interface between xdr serializer and tcp connection. * Behaves like the system calls, read & write, but keeps some error state * around for the rpc level. */ static int -read_vc(ctp, buf, len) - void *ctp; - void *buf; - int len; +read_vc(void *ctp, void *buf, int len) { struct sockaddr sa; socklen_t sal; struct ct_data *ct = (struct ct_data *)ctp; struct pollfd fd; int milliseconds = (int)((ct->ct_wait.tv_sec * 1000) + (ct->ct_wait.tv_usec / 1000)); if (len == 0) return (0); fd.fd = ct->ct_fd; fd.events = POLLIN; for (;;) { switch (_poll(&fd, 1, milliseconds)) { case 0: ct->ct_error.re_status = RPC_TIMEDOUT; return (-1); case -1: if (errno == EINTR) continue; ct->ct_error.re_status = RPC_CANTRECV; ct->ct_error.re_errno = errno; return (-1); } break; } sal = sizeof(sa); if ((_getpeername(ct->ct_fd, &sa, &sal) == 0) && (sa.sa_family == AF_LOCAL)) { len = __msgread(ct->ct_fd, buf, (size_t)len); } else { len = _read(ct->ct_fd, buf, (size_t)len); } switch (len) { case 0: /* premature eof */ ct->ct_error.re_errno = ECONNRESET; ct->ct_error.re_status = RPC_CANTRECV; len = -1; /* it's really an error */ break; case -1: ct->ct_error.re_errno = errno; ct->ct_error.re_status = RPC_CANTRECV; break; } return (len); } static int -write_vc(ctp, buf, len) - void *ctp; - void *buf; - int len; +write_vc(void *ctp, void *buf, int len) { struct sockaddr sa; socklen_t sal; struct ct_data *ct = (struct ct_data *)ctp; int i, cnt; sal = sizeof(sa); if ((_getpeername(ct->ct_fd, &sa, &sal) == 0) && (sa.sa_family == AF_LOCAL)) { for (cnt = len; cnt > 0; cnt -= i, buf = (char *)buf + i) { if ((i = __msgwrite(ct->ct_fd, buf, (size_t)cnt)) == -1) { ct->ct_error.re_errno = errno; ct->ct_error.re_status = RPC_CANTSEND; return (-1); } } } else { for (cnt = len; cnt > 0; cnt -= i, buf = (char *)buf + i) { if ((i = _write(ct->ct_fd, buf, (size_t)cnt)) == -1) { ct->ct_error.re_errno = errno; ct->ct_error.re_status = RPC_CANTSEND; return (-1); } } } return (len); } static struct clnt_ops * -clnt_vc_ops() +clnt_vc_ops(void) { static struct clnt_ops ops; sigset_t mask, newmask; /* VARIABLES PROTECTED BY ops_lock: ops */ sigfillset(&newmask); thr_sigsetmask(SIG_SETMASK, &newmask, &mask); mutex_lock(&ops_lock); if (ops.cl_call == NULL) { ops.cl_call = clnt_vc_call; ops.cl_abort = clnt_vc_abort; ops.cl_geterr = clnt_vc_geterr; ops.cl_freeres = clnt_vc_freeres; ops.cl_destroy = clnt_vc_destroy; ops.cl_control = clnt_vc_control; } mutex_unlock(&ops_lock); thr_sigsetmask(SIG_SETMASK, &(mask), NULL); return (&ops); } /* * Make sure that the time is not garbage. -1 value is disallowed. * Note this is different from time_not_ok in clnt_dg.c */ static bool_t -time_not_ok(t) - struct timeval *t; +time_not_ok(struct timeval *t) { return (t->tv_sec <= -1 || t->tv_sec > 100000000 || t->tv_usec <= -1 || t->tv_usec > 1000000); } static int -__msgread(sock, buf, cnt) - int sock; - void *buf; - size_t cnt; +__msgread(int sock, void *buf, size_t cnt) { struct iovec iov[1]; struct msghdr msg; union { struct cmsghdr cmsg; char control[CMSG_SPACE(sizeof(struct cmsgcred))]; } cm; bzero((char *)&cm, sizeof(cm)); iov[0].iov_base = buf; iov[0].iov_len = cnt; msg.msg_iov = iov; msg.msg_iovlen = 1; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = (caddr_t)&cm; msg.msg_controllen = CMSG_SPACE(sizeof(struct cmsgcred)); msg.msg_flags = 0; return(_recvmsg(sock, &msg, 0)); } static int -__msgwrite(sock, buf, cnt) - int sock; - void *buf; - size_t cnt; +__msgwrite(int sock, void *buf, size_t cnt) { struct iovec iov[1]; struct msghdr msg; union { struct cmsghdr cmsg; char control[CMSG_SPACE(sizeof(struct cmsgcred))]; } cm; bzero((char *)&cm, sizeof(cm)); iov[0].iov_base = buf; iov[0].iov_len = cnt; cm.cmsg.cmsg_type = SCM_CREDS; cm.cmsg.cmsg_level = SOL_SOCKET; cm.cmsg.cmsg_len = CMSG_LEN(sizeof(struct cmsgcred)); msg.msg_iov = iov; msg.msg_iovlen = 1; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_control = (caddr_t)&cm; msg.msg_controllen = CMSG_SPACE(sizeof(struct cmsgcred)); msg.msg_flags = 0; return(_sendmsg(sock, &msg, 0)); } Index: projects/clang370-import/lib/libc/rpc/des_crypt.c =================================================================== --- projects/clang370-import/lib/libc/rpc/des_crypt.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/des_crypt.c (revision 288126) @@ -1,140 +1,140 @@ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * des_crypt.c, DES encryption library routines * Copyright (C) 1986, Sun Microsystems, Inc. */ #include #include #include #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)des_crypt.c 2.2 88/08/10 4.0 RPCSRC; from 1.13 88/02/08 SMI"; #endif #include __FBSDID("$FreeBSD$"); static int common_crypt( char *, char *, unsigned, unsigned, struct desparams * ); -int (*__des_crypt_LOCAL)() = 0; +int (*__des_crypt_LOCAL)(char *, unsigned, struct desparams *) = 0; extern int _des_crypt_call(char *, int, struct desparams *); /* * Copy 8 bytes */ #define COPY8(src, dst) { \ char *a = (char *) dst; \ char *b = (char *) src; \ *a++ = *b++; *a++ = *b++; *a++ = *b++; *a++ = *b++; \ *a++ = *b++; *a++ = *b++; *a++ = *b++; *a++ = *b++; \ } /* * Copy multiple of 8 bytes */ #define DESCOPY(src, dst, len) { \ char *a = (char *) dst; \ char *b = (char *) src; \ int i; \ for (i = (int) len; i > 0; i -= 8) { \ *a++ = *b++; *a++ = *b++; *a++ = *b++; *a++ = *b++; \ *a++ = *b++; *a++ = *b++; *a++ = *b++; *a++ = *b++; \ } \ } /* * CBC mode encryption */ int cbc_crypt(char *key, char *buf, unsigned len, unsigned mode, char *ivec) { int err; struct desparams dp; #ifdef BROKEN_DES dp.UDES.UDES_buf = buf; dp.des_mode = ECB; #else dp.des_mode = CBC; #endif COPY8(ivec, dp.des_ivec); err = common_crypt(key, buf, len, mode, &dp); COPY8(dp.des_ivec, ivec); return(err); } /* * ECB mode encryption */ int ecb_crypt(char *key, char *buf, unsigned len, unsigned mode) { struct desparams dp; #ifdef BROKEN_DES dp.UDES.UDES_buf = buf; dp.des_mode = CBC; #else dp.des_mode = ECB; #endif return(common_crypt(key, buf, len, mode, &dp)); } /* * Common code to cbc_crypt() & ecb_crypt() */ static int common_crypt(char *key, char *buf, unsigned len, unsigned mode, struct desparams *desp) { int desdev; if ((len % 8) != 0 || len > DES_MAXDATA) { return(DESERR_BADPARAM); } desp->des_dir = ((mode & DES_DIRMASK) == DES_ENCRYPT) ? ENCRYPT : DECRYPT; desdev = mode & DES_DEVMASK; COPY8(key, desp->des_key); /* * software */ if (__des_crypt_LOCAL != NULL) { if (!__des_crypt_LOCAL(buf, len, desp)) { return (DESERR_HWERROR); } } else { if (!_des_crypt_call(buf, len, desp)) { return (DESERR_HWERROR); } } return(desdev == DES_SW ? DESERR_NONE : DESERR_NOHWDEVICE); } Index: projects/clang370-import/lib/libc/rpc/getnetconfig.c =================================================================== --- projects/clang370-import/lib/libc/rpc/getnetconfig.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/getnetconfig.c (revision 288126) @@ -1,740 +1,735 @@ /* $NetBSD: getnetconfig.c,v 1.3 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)getnetconfig.c 1.12 91/12/19 SMI"; #endif #include __FBSDID("$FreeBSD$"); /* * Copyright (c) 1989 by Sun Microsystems, Inc. */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" /* * The five library routines in this file provide application access to the * system network configuration database, /etc/netconfig. In addition to the * netconfig database and the routines for accessing it, the environment * variable NETPATH and its corresponding routines in getnetpath.c may also be * used to specify the network transport to be used. */ /* * netconfig errors */ #define NC_NONETCONFIG ENOENT #define NC_NOMEM ENOMEM #define NC_NOTINIT EINVAL /* setnetconfig was not called first */ #define NC_BADFILE EBADF /* format for netconfig file is bad */ #define NC_NOTFOUND ENOPROTOOPT /* specified netid was not found */ /* * semantics as strings (should be in netconfig.h) */ #define NC_TPI_CLTS_S "tpi_clts" #define NC_TPI_COTS_S "tpi_cots" #define NC_TPI_COTS_ORD_S "tpi_cots_ord" #define NC_TPI_RAW_S "tpi_raw" /* * flags as characters (also should be in netconfig.h) */ #define NC_NOFLAG_C '-' #define NC_VISIBLE_C 'v' #define NC_BROADCAST_C 'b' /* * Character used to indicate there is no name-to-address lookup library */ #define NC_NOLOOKUP "-" static const char * const _nc_errors[] = { "Netconfig database not found", "Not enough memory", "Not initialized", "Netconfig database has invalid format", "Netid not found in netconfig database" }; struct netconfig_info { int eof; /* all entries has been read */ int ref; /* # of times setnetconfig() has been called */ struct netconfig_list *head; /* head of the list */ struct netconfig_list *tail; /* last of the list */ }; struct netconfig_list { char *linep; /* hold line read from netconfig */ struct netconfig *ncp; struct netconfig_list *next; }; struct netconfig_vars { int valid; /* token that indicates a valid netconfig_vars */ int flag; /* first time flag */ struct netconfig_list *nc_configs; /* pointer to the current netconfig entry */ }; #define NC_VALID 0xfeed #define NC_STORAGE 0xf00d #define NC_INVALID 0 static int *__nc_error(void); static int parse_ncp(char *, struct netconfig *); static struct netconfig *dup_ncp(struct netconfig *); static FILE *nc_file; /* for netconfig db */ static mutex_t nc_file_lock = MUTEX_INITIALIZER; static struct netconfig_info ni = { 0, 0, NULL, NULL}; static mutex_t ni_lock = MUTEX_INITIALIZER; static thread_key_t nc_key; static once_t nc_once = ONCE_INITIALIZER; static int nc_key_error; static void nc_key_init(void) { nc_key_error = thr_keycreate(&nc_key, free); } #define MAXNETCONFIGLINE 1000 static int * -__nc_error() +__nc_error(void) { static int nc_error = 0; int *nc_addr; /* * Use the static `nc_error' if we are the main thread * (including non-threaded programs), or if an allocation * fails. */ if (thr_main()) return (&nc_error); if (thr_once(&nc_once, nc_key_init) != 0 || nc_key_error != 0) return (&nc_error); if ((nc_addr = (int *)thr_getspecific(nc_key)) == NULL) { nc_addr = (int *)malloc(sizeof (int)); if (thr_setspecific(nc_key, (void *) nc_addr) != 0) { if (nc_addr) free(nc_addr); return (&nc_error); } *nc_addr = 0; } return (nc_addr); } #define nc_error (*(__nc_error())) /* * A call to setnetconfig() establishes a /etc/netconfig "session". A session * "handle" is returned on a successful call. At the start of a session (after * a call to setnetconfig()) searches through the /etc/netconfig database will * proceed from the start of the file. The session handle must be passed to * getnetconfig() to parse the file. Each call to getnetconfig() using the * current handle will process one subsequent entry in /etc/netconfig. * setnetconfig() must be called before the first call to getnetconfig(). * (Handles are used to allow for nested calls to setnetpath()). * * A new session is established with each call to setnetconfig(), with a new * handle being returned on each call. Previously established sessions remain * active until endnetconfig() is called with that session's handle as an * argument. * * setnetconfig() need *not* be called before a call to getnetconfigent(). * setnetconfig() returns a NULL pointer on failure (for example, if * the netconfig database is not present). */ void * -setnetconfig() +setnetconfig(void) { struct netconfig_vars *nc_vars; if ((nc_vars = (struct netconfig_vars *)malloc(sizeof (struct netconfig_vars))) == NULL) { return(NULL); } /* * For multiple calls, i.e. nc_file is not NULL, we just return the * handle without reopening the netconfig db. */ mutex_lock(&ni_lock); ni.ref++; mutex_unlock(&ni_lock); mutex_lock(&nc_file_lock); if ((nc_file != NULL) || (nc_file = fopen(NETCONFIG, "r")) != NULL) { nc_vars->valid = NC_VALID; nc_vars->flag = 0; nc_vars->nc_configs = ni.head; mutex_unlock(&nc_file_lock); return ((void *)nc_vars); } mutex_unlock(&nc_file_lock); mutex_lock(&ni_lock); ni.ref--; mutex_unlock(&ni_lock); nc_error = NC_NONETCONFIG; free(nc_vars); return (NULL); } /* * When first called, getnetconfig() returns a pointer to the first entry in * the netconfig database, formatted as a struct netconfig. On each subsequent * call, getnetconfig() returns a pointer to the next entry in the database. * getnetconfig() can thus be used to search the entire netconfig file. * getnetconfig() returns NULL at end of file. */ struct netconfig * -getnetconfig(handlep) -void *handlep; +getnetconfig(void *handlep) { struct netconfig_vars *ncp = (struct netconfig_vars *)handlep; char *stringp; /* tmp string pointer */ struct netconfig_list *list; struct netconfig *np; struct netconfig *result; /* * Verify that handle is valid */ mutex_lock(&nc_file_lock); if (ncp == NULL || nc_file == NULL) { nc_error = NC_NOTINIT; mutex_unlock(&nc_file_lock); return (NULL); } mutex_unlock(&nc_file_lock); switch (ncp->valid) { case NC_VALID: /* * If entry has already been read into the list, * we return the entry in the linked list. * If this is the first time call, check if there are any entries in * linked list. If no entries, we need to read the netconfig db. * If we have been here and the next entry is there, we just return * it. */ if (ncp->flag == 0) { /* first time */ ncp->flag = 1; mutex_lock(&ni_lock); ncp->nc_configs = ni.head; mutex_unlock(&ni_lock); if (ncp->nc_configs != NULL) /* entry already exist */ return(ncp->nc_configs->ncp); } else if (ncp->nc_configs != NULL && ncp->nc_configs->next != NULL) { ncp->nc_configs = ncp->nc_configs->next; return(ncp->nc_configs->ncp); } /* * If we cannot find the entry in the list and is end of file, * we give up. */ mutex_lock(&ni_lock); if (ni.eof == 1) { mutex_unlock(&ni_lock); return(NULL); } mutex_unlock(&ni_lock); break; default: nc_error = NC_NOTINIT; return (NULL); } stringp = (char *) malloc(MAXNETCONFIGLINE); if (stringp == NULL) return (NULL); #ifdef MEM_CHK if (malloc_verify() == 0) { fprintf(stderr, "memory heap corrupted in getnetconfig\n"); exit(1); } #endif /* * Read a line from netconfig file. */ mutex_lock(&nc_file_lock); do { if (fgets(stringp, MAXNETCONFIGLINE, nc_file) == NULL) { free(stringp); mutex_lock(&ni_lock); ni.eof = 1; mutex_unlock(&ni_lock); mutex_unlock(&nc_file_lock); return (NULL); } } while (*stringp == '#'); mutex_unlock(&nc_file_lock); list = (struct netconfig_list *) malloc(sizeof (struct netconfig_list)); if (list == NULL) { free(stringp); return(NULL); } np = (struct netconfig *) malloc(sizeof (struct netconfig)); if (np == NULL) { free(stringp); free(list); return(NULL); } list->ncp = np; list->next = NULL; list->ncp->nc_lookups = NULL; list->linep = stringp; if (parse_ncp(stringp, list->ncp) == -1) { free(stringp); free(np); free(list); return (NULL); } else { /* * If this is the first entry that's been read, it is the head of * the list. If not, put the entry at the end of the list. * Reposition the current pointer of the handle to the last entry * in the list. */ mutex_lock(&ni_lock); if (ni.head == NULL) { /* first entry */ ni.head = ni.tail = list; } else { ni.tail->next = list; ni.tail = ni.tail->next; } ncp->nc_configs = ni.tail; result = ni.tail->ncp; mutex_unlock(&ni_lock); return(result); } } /* * endnetconfig() may be called to "unbind" or "close" the netconfig database * when processing is complete, releasing resources for reuse. endnetconfig() * may not be called before setnetconfig(). endnetconfig() returns 0 on * success and -1 on failure (for example, if setnetconfig() was not called * previously). */ int -endnetconfig(handlep) -void *handlep; +endnetconfig(void *handlep) { struct netconfig_vars *nc_handlep = (struct netconfig_vars *)handlep; struct netconfig_list *q, *p; /* * Verify that handle is valid */ if (nc_handlep == NULL || (nc_handlep->valid != NC_VALID && nc_handlep->valid != NC_STORAGE)) { nc_error = NC_NOTINIT; return (-1); } /* * Return 0 if anyone still needs it. */ nc_handlep->valid = NC_INVALID; nc_handlep->flag = 0; nc_handlep->nc_configs = NULL; mutex_lock(&ni_lock); if (--ni.ref > 0) { mutex_unlock(&ni_lock); free(nc_handlep); return(0); } /* * Noone needs these entries anymore, then frees them. * Make sure all info in netconfig_info structure has been reinitialized. */ q = ni.head; ni.eof = ni.ref = 0; ni.head = NULL; ni.tail = NULL; mutex_unlock(&ni_lock); while (q != NULL) { p = q->next; if (q->ncp->nc_lookups != NULL) free(q->ncp->nc_lookups); free(q->ncp); free(q->linep); free(q); q = p; } free(nc_handlep); mutex_lock(&nc_file_lock); fclose(nc_file); nc_file = NULL; mutex_unlock(&nc_file_lock); return (0); } /* * getnetconfigent(netid) returns a pointer to the struct netconfig structure * corresponding to netid. It returns NULL if netid is invalid (that is, does * not name an entry in the netconfig database). It returns NULL and sets * errno in case of failure (for example, if the netconfig database cannot be * opened). */ struct netconfig * -getnetconfigent(netid) - const char *netid; +getnetconfigent(const char *netid) { FILE *file; /* NETCONFIG db's file pointer */ char *linep; /* holds current netconfig line */ char *stringp; /* temporary string pointer */ struct netconfig *ncp = NULL; /* returned value */ struct netconfig_list *list; /* pointer to cache list */ nc_error = NC_NOTFOUND; /* default error. */ if (netid == NULL || strlen(netid) == 0) { return (NULL); } /* * Look up table if the entries have already been read and parsed in * getnetconfig(), then copy this entry into a buffer and return it. * If we cannot find the entry in the current list and there are more * entries in the netconfig db that has not been read, we then read the * db and try find the match netid. * If all the netconfig db has been read and placed into the list and * there is no match for the netid, return NULL. */ mutex_lock(&ni_lock); if (ni.head != NULL) { for (list = ni.head; list; list = list->next) { if (strcmp(list->ncp->nc_netid, netid) == 0) { mutex_unlock(&ni_lock); return(dup_ncp(list->ncp)); } } if (ni.eof == 1) { /* that's all the entries */ mutex_unlock(&ni_lock); return(NULL); } } mutex_unlock(&ni_lock); if ((file = fopen(NETCONFIG, "r")) == NULL) { nc_error = NC_NONETCONFIG; return (NULL); } if ((linep = malloc(MAXNETCONFIGLINE)) == NULL) { fclose(file); nc_error = NC_NOMEM; return (NULL); } do { ptrdiff_t len; char *tmpp; /* tmp string pointer */ do { if ((stringp = fgets(linep, MAXNETCONFIGLINE, file)) == NULL) { break; } } while (*stringp == '#'); if (stringp == NULL) { /* eof */ break; } if ((tmpp = strpbrk(stringp, "\t ")) == NULL) { /* can't parse file */ nc_error = NC_BADFILE; break; } if (strlen(netid) == (size_t) (len = tmpp - stringp) && /* a match */ strncmp(stringp, netid, (size_t)len) == 0) { if ((ncp = (struct netconfig *) malloc(sizeof (struct netconfig))) == NULL) { break; } ncp->nc_lookups = NULL; if (parse_ncp(linep, ncp) == -1) { free(ncp); ncp = NULL; } break; } } while (stringp != NULL); if (ncp == NULL) { free(linep); } fclose(file); return(ncp); } /* * freenetconfigent(netconfigp) frees the netconfig structure pointed to by * netconfigp (previously returned by getnetconfigent()). */ void -freenetconfigent(netconfigp) - struct netconfig *netconfigp; +freenetconfigent(struct netconfig *netconfigp) { if (netconfigp != NULL) { free(netconfigp->nc_netid); /* holds all netconfigp's strings */ if (netconfigp->nc_lookups != NULL) free(netconfigp->nc_lookups); free(netconfigp); } return; } /* * Parse line and stuff it in a struct netconfig * Typical line might look like: * udp tpi_cots vb inet udp /dev/udp /usr/lib/ip.so,/usr/local/ip.so * * We return -1 if any of the tokens don't parse, or malloc fails. * * Note that we modify stringp (putting NULLs after tokens) and * we set the ncp's string field pointers to point to these tokens within * stringp. + * + * stringp - string to parse + * ncp - where to put results */ static int -parse_ncp(stringp, ncp) -char *stringp; /* string to parse */ -struct netconfig *ncp; /* where to put results */ +parse_ncp(char *stringp, struct netconfig *ncp) { char *tokenp; /* for processing tokens */ char *lasts; char **nc_lookups; nc_error = NC_BADFILE; /* nearly anything that breaks is for this reason */ stringp[strlen(stringp)-1] = '\0'; /* get rid of newline */ /* netid */ if ((ncp->nc_netid = strtok_r(stringp, "\t ", &lasts)) == NULL) { return (-1); } /* semantics */ if ((tokenp = strtok_r(NULL, "\t ", &lasts)) == NULL) { return (-1); } if (strcmp(tokenp, NC_TPI_COTS_ORD_S) == 0) ncp->nc_semantics = NC_TPI_COTS_ORD; else if (strcmp(tokenp, NC_TPI_COTS_S) == 0) ncp->nc_semantics = NC_TPI_COTS; else if (strcmp(tokenp, NC_TPI_CLTS_S) == 0) ncp->nc_semantics = NC_TPI_CLTS; else if (strcmp(tokenp, NC_TPI_RAW_S) == 0) ncp->nc_semantics = NC_TPI_RAW; else return (-1); /* flags */ if ((tokenp = strtok_r(NULL, "\t ", &lasts)) == NULL) { return (-1); } for (ncp->nc_flag = NC_NOFLAG; *tokenp != '\0'; tokenp++) { switch (*tokenp) { case NC_NOFLAG_C: break; case NC_VISIBLE_C: ncp->nc_flag |= NC_VISIBLE; break; case NC_BROADCAST_C: ncp->nc_flag |= NC_BROADCAST; break; default: return (-1); } } /* protocol family */ if ((ncp->nc_protofmly = strtok_r(NULL, "\t ", &lasts)) == NULL) { return (-1); } /* protocol name */ if ((ncp->nc_proto = strtok_r(NULL, "\t ", &lasts)) == NULL) { return (-1); } /* network device */ if ((ncp->nc_device = strtok_r(NULL, "\t ", &lasts)) == NULL) { return (-1); } if ((tokenp = strtok_r(NULL, "\t ", &lasts)) == NULL) { return (-1); } if (strcmp(tokenp, NC_NOLOOKUP) == 0) { ncp->nc_nlookups = 0; ncp->nc_lookups = NULL; } else { char *cp; /* tmp string */ if (ncp->nc_lookups != NULL) /* from last visit */ free(ncp->nc_lookups); ncp->nc_lookups = NULL; ncp->nc_nlookups = 0; while ((cp = tokenp) != NULL) { if ((nc_lookups = realloc(ncp->nc_lookups, (ncp->nc_nlookups + 1) * sizeof *ncp->nc_lookups)) == NULL) { free(ncp->nc_lookups); ncp->nc_lookups = NULL; return (-1); } tokenp = _get_next_token(cp, ','); ncp->nc_lookups = nc_lookups; ncp->nc_lookups[ncp->nc_nlookups++] = cp; } } return (0); } /* * Returns a string describing the reason for failure. */ char * -nc_sperror() +nc_sperror(void) { const char *message; switch(nc_error) { case NC_NONETCONFIG: message = _nc_errors[0]; break; case NC_NOMEM: message = _nc_errors[1]; break; case NC_NOTINIT: message = _nc_errors[2]; break; case NC_BADFILE: message = _nc_errors[3]; break; case NC_NOTFOUND: message = _nc_errors[4]; break; default: message = "Unknown network selection error"; } /* LINTED const castaway */ return ((char *)message); } /* * Prints a message onto standard error describing the reason for failure. */ void -nc_perror(s) - const char *s; +nc_perror(const char *s) { fprintf(stderr, "%s: %s\n", s, nc_sperror()); } /* * Duplicates the matched netconfig buffer. */ static struct netconfig * -dup_ncp(ncp) -struct netconfig *ncp; +dup_ncp(struct netconfig *ncp) { struct netconfig *p; char *tmp; u_int i; if ((tmp=malloc(MAXNETCONFIGLINE)) == NULL) return(NULL); if ((p=(struct netconfig *)malloc(sizeof(struct netconfig))) == NULL) { free(tmp); return(NULL); } /* * First we dup all the data from matched netconfig buffer. Then we * adjust some of the member pointer to a pre-allocated buffer where * contains part of the data. * To follow the convention used in parse_ncp(), we store all the * necessary information in the pre-allocated buffer and let each * of the netconfig char pointer member point to the right address * in the buffer. */ *p = *ncp; p->nc_netid = (char *)strcpy(tmp,ncp->nc_netid); tmp = strchr(tmp, '\0') + 1; p->nc_protofmly = (char *)strcpy(tmp,ncp->nc_protofmly); tmp = strchr(tmp, '\0') + 1; p->nc_proto = (char *)strcpy(tmp,ncp->nc_proto); tmp = strchr(tmp, '\0') + 1; p->nc_device = (char *)strcpy(tmp,ncp->nc_device); p->nc_lookups = (char **)malloc((size_t)(p->nc_nlookups+1) * sizeof(char *)); if (p->nc_lookups == NULL) { free(p->nc_netid); free(p); return(NULL); } for (i=0; i < p->nc_nlookups; i++) { tmp = strchr(tmp, '\0') + 1; p->nc_lookups[i] = (char *)strcpy(tmp,ncp->nc_lookups[i]); } return(p); } Index: projects/clang370-import/lib/libc/rpc/getnetpath.c =================================================================== --- projects/clang370-import/lib/libc/rpc/getnetpath.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/getnetpath.c (revision 288126) @@ -1,273 +1,271 @@ /* $NetBSD: getnetpath.c,v 1.3 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)getnetpath.c 1.11 91/12/19 SMI"; #endif #include __FBSDID("$FreeBSD$"); /* * Copyright (c) 1989 by Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include #include #include "un-namespace.h" /* * internal structure to keep track of a netpath "session" */ struct netpath_chain { struct netconfig *ncp; /* an nconf entry */ struct netpath_chain *nchain_next; /* next nconf entry allocated */ }; struct netpath_vars { int valid; /* token that indicates a valid netpath_vars */ void *nc_handlep; /* handle for current netconfig "session" */ char *netpath; /* pointer to current view-point in NETPATH */ char *netpath_start; /* pointer to start of our copy of NETPATH */ struct netpath_chain *ncp_list; /* list of nconfs allocated this session*/ }; #define NP_VALID 0xf00d #define NP_INVALID 0 char *_get_next_token(char *, int); /* * A call to setnetpath() establishes a NETPATH "session". setnetpath() * must be called before the first call to getnetpath(). A "handle" is * returned to distinguish the session; this handle should be passed * subsequently to getnetpath(). (Handles are used to allow for nested calls * to setnetpath()). * If setnetpath() is unable to establish a session (due to lack of memory * resources, or the absence of the /etc/netconfig file), a NULL pointer is * returned. */ void * -setnetpath() +setnetpath(void) { struct netpath_vars *np_sessionp; /* this session's variables */ char *npp; /* NETPATH env variable */ #ifdef MEM_CHK malloc_debug(1); #endif if ((np_sessionp = (struct netpath_vars *)malloc(sizeof (struct netpath_vars))) == NULL) { return (NULL); } if ((np_sessionp->nc_handlep = setnetconfig()) == NULL) { syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); goto failed; } np_sessionp->valid = NP_VALID; np_sessionp->ncp_list = NULL; if ((npp = getenv(NETPATH)) == NULL) { np_sessionp->netpath = NULL; } else { (void) endnetconfig(np_sessionp->nc_handlep);/* won't need nc session*/ np_sessionp->nc_handlep = NULL; if ((np_sessionp->netpath = malloc(strlen(npp)+1)) == NULL) goto failed; else { (void) strcpy(np_sessionp->netpath, npp); } } np_sessionp->netpath_start = np_sessionp->netpath; return ((void *)np_sessionp); failed: free(np_sessionp); return (NULL); } /* * When first called, getnetpath() returns a pointer to the netconfig * database entry corresponding to the first valid NETPATH component. The * netconfig entry is formatted as a struct netconfig. * On each subsequent call, getnetpath returns a pointer to the netconfig * entry that corresponds to the next valid NETPATH component. getnetpath * can thus be used to search the netconfig database for all networks * included in the NETPATH variable. * When NETPATH has been exhausted, getnetpath() returns NULL. It returns * NULL and sets errno in case of an error (e.g., setnetpath was not called * previously). * getnetpath() silently ignores invalid NETPATH components. A NETPATH * compnent is invalid if there is no corresponding entry in the netconfig * database. * If the NETPATH variable is unset, getnetpath() behaves as if NETPATH * were set to the sequence of default or visible networks in the netconfig * database, in the order in which they are listed. */ struct netconfig * -getnetpath(handlep) - void *handlep; +getnetpath(void *handlep) { struct netpath_vars *np_sessionp = (struct netpath_vars *)handlep; struct netconfig *ncp = NULL; /* temp. holds a netconfig session */ struct netpath_chain *chainp; /* holds chain of ncp's we alloc */ char *npp; /* holds current NETPATH */ if (np_sessionp == NULL || np_sessionp->valid != NP_VALID) { errno = EINVAL; return (NULL); } if (np_sessionp->netpath_start == NULL) { /* NETPATH was not set */ do { /* select next visible network */ if (np_sessionp->nc_handlep == NULL) { np_sessionp->nc_handlep = setnetconfig(); if (np_sessionp->nc_handlep == NULL) syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); } if ((ncp = getnetconfig(np_sessionp->nc_handlep)) == NULL) { return(NULL); } } while ((ncp->nc_flag & NC_VISIBLE) == 0); return (ncp); } /* * Find first valid network ID in netpath. */ while ((npp = np_sessionp->netpath) != NULL && strlen(npp) != 0) { np_sessionp->netpath = _get_next_token(npp, ':'); /* * npp is a network identifier. */ if ((ncp = getnetconfigent(npp)) != NULL) { chainp = (struct netpath_chain *) /* cobble alloc chain entry */ malloc(sizeof (struct netpath_chain)); chainp->ncp = ncp; chainp->nchain_next = NULL; if (np_sessionp->ncp_list == NULL) { np_sessionp->ncp_list = chainp; } else { np_sessionp->ncp_list->nchain_next = chainp; } return (ncp); } /* couldn't find this token in the database; go to next one. */ } return (NULL); } /* * endnetpath() may be called to unbind NETPATH when processing is complete, * releasing resources for reuse. It returns 0 on success and -1 on failure * (e.g. if setnetpath() was not called previously. */ int -endnetpath(handlep) - void *handlep; +endnetpath(void *handlep) { struct netpath_vars *np_sessionp = (struct netpath_vars *)handlep; struct netpath_chain *chainp, *lastp; if (np_sessionp == NULL || np_sessionp->valid != NP_VALID) { errno = EINVAL; return (-1); } if (np_sessionp->nc_handlep != NULL) endnetconfig(np_sessionp->nc_handlep); if (np_sessionp->netpath_start != NULL) free(np_sessionp->netpath_start); for (chainp = np_sessionp->ncp_list; chainp != NULL; lastp=chainp, chainp=chainp->nchain_next, free(lastp)) { freenetconfigent(chainp->ncp); } free(np_sessionp); #ifdef MEM_CHK if (malloc_verify() == 0) { fprintf(stderr, "memory heap corrupted in endnetpath\n"); exit(1); } #endif return (0); } /* * Returns pointer to the rest-of-the-string after the current token. * The token itself starts at arg, and we null terminate it. We return NULL * if either the arg is empty, or if this is the last token. + * + * npp - string + * token - char to parse string for */ - char * -_get_next_token(npp, token) -char *npp; /* string */ -int token; /* char to parse string for */ +_get_next_token(char *npp, int token) { char *cp; /* char pointer */ char *np; /* netpath pointer */ char *ep; /* escape pointer */ if ((cp = strchr(npp, token)) == NULL) { return (NULL); } /* * did find a token, but it might be escaped. */ if ((cp > npp) && (cp[-1] == '\\')) { /* if slash was also escaped, carry on, otherwise find next token */ if ((cp > npp + 1) && (cp[-2] != '\\')) { /* shift r-o-s onto the escaped token */ strcpy(&cp[-1], cp); /* XXX: overlapping string copy */ /* * Do a recursive call. * We don't know how many escaped tokens there might be. */ return (_get_next_token(cp, token)); } } *cp++ = '\0'; /* null-terminate token */ /* get rid of any backslash escapes */ ep = npp; while ((np = strchr(ep, '\\')) != 0) { if (np[1] == '\\') np++; strcpy(np, (ep = &np[1])); /* XXX: overlapping string copy */ } return (cp); /* return ptr to r-o-s */ } Index: projects/clang370-import/lib/libc/rpc/getrpcent.c =================================================================== --- projects/clang370-import/lib/libc/rpc/getrpcent.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/getrpcent.c (revision 288126) @@ -1,1046 +1,1046 @@ /* $NetBSD: getrpcent.c,v 1.17 2000/01/22 22:19:17 mycroft Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid = "@(#)getrpcent.c 1.14 91/03/11 Copyr 1984 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * Copyright (c) 1984 by Sun Microsystems, Inc. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef YP #include #include #endif #include #include "namespace.h" #include "reentrant.h" #include "un-namespace.h" #include "libc_private.h" #include "nss_tls.h" #ifdef NS_CACHING #include "nscache.h" #endif #define RPCDB "/etc/rpc" /* nsswitch declarations */ enum constants { SETRPCENT = 1, ENDRPCENT = 2, RPCENT_STORAGE_INITIAL = 1 << 10, /* 1 KByte */ RPCENT_STORAGE_MAX = 1 << 20, /* 1 MByte */ }; static const ns_src defaultsrc[] = { { NSSRC_FILES, NS_SUCCESS }, #ifdef YP { NSSRC_NIS, NS_SUCCESS }, #endif { NULL, 0 } }; /* files backend declarations */ struct files_state { FILE *fp; int stayopen; }; static int files_rpcent(void *, void *, va_list); static int files_setrpcent(void *, void *, va_list); static void files_endstate(void *); NSS_TLS_HANDLING(files); /* nis backend declarations */ #ifdef YP struct nis_state { char domain[MAXHOSTNAMELEN]; char *current; int currentlen; int stepping; int no_name_map; }; static int nis_rpcent(void *, void *, va_list); static int nis_setrpcent(void *, void *, va_list); static void nis_endstate(void *); NSS_TLS_HANDLING(nis); #endif /* get** wrappers for get**_r functions declarations */ struct rpcent_state { struct rpcent rpc; char *buffer; size_t bufsize; }; static void rpcent_endstate(void *); NSS_TLS_HANDLING(rpcent); union key { const char *name; int number; }; static int wrap_getrpcbyname_r(union key, struct rpcent *, char *, size_t, struct rpcent **); static int wrap_getrpcbynumber_r(union key, struct rpcent *, char *, size_t, struct rpcent **); static int wrap_getrpcent_r(union key, struct rpcent *, char *, size_t, struct rpcent **); static struct rpcent *getrpc(int (*fn)(union key, struct rpcent *, char *, size_t, struct rpcent **), union key); #ifdef NS_CACHING static int rpc_id_func(char *, size_t *, va_list, void *); static int rpc_marshal_func(char *, size_t *, void *, va_list, void *); static int rpc_unmarshal_func(char *, size_t, void *, va_list, void *); #endif static int rpcent_unpack(char *p, struct rpcent *rpc, char **r_aliases, size_t aliases_size, int *errnop) { char *cp, **q; assert(p != NULL); if (*p == '#') return (-1); cp = strpbrk(p, "#\n"); if (cp == NULL) return (-1); *cp = '\0'; cp = strpbrk(p, " \t"); if (cp == NULL) return (-1); *cp++ = '\0'; /* THIS STUFF IS INTERNET SPECIFIC */ rpc->r_name = p; while (*cp == ' ' || *cp == '\t') cp++; rpc->r_number = atoi(cp); q = rpc->r_aliases = r_aliases; cp = strpbrk(cp, " \t"); if (cp != NULL) *cp++ = '\0'; while (cp && *cp) { if (*cp == ' ' || *cp == '\t') { cp++; continue; } if (q < &(r_aliases[aliases_size - 1])) *q++ = cp; else { *errnop = ERANGE; return -1; } cp = strpbrk(cp, " \t"); if (cp != NULL) *cp++ = '\0'; } *q = NULL; return 0; } /* files backend implementation */ static void files_endstate(void *p) { FILE * f; if (p == NULL) return; f = ((struct files_state *)p)->fp; if (f != NULL) fclose(f); free(p); } static int files_rpcent(void *retval, void *mdata, va_list ap) { char *name; int number; struct rpcent *rpc; char *buffer; size_t bufsize; int *errnop; char *line; size_t linesize; char **aliases; int aliases_size; char **rp; struct files_state *st; int rv; int stayopen; enum nss_lookup_type how; how = (enum nss_lookup_type)mdata; switch (how) { case nss_lt_name: name = va_arg(ap, char *); break; case nss_lt_id: number = va_arg(ap, int); break; case nss_lt_all: break; default: return (NS_NOTFOUND); } rpc = va_arg(ap, struct rpcent *); buffer = va_arg(ap, char *); bufsize = va_arg(ap, size_t); errnop = va_arg(ap, int *); *errnop = files_getstate(&st); if (*errnop != 0) return (NS_UNAVAIL); if (st->fp == NULL && (st->fp = fopen(RPCDB, "r")) == NULL) { *errnop = errno; return (NS_UNAVAIL); } if (how == nss_lt_all) stayopen = 1; else { rewind(st->fp); stayopen = st->stayopen; } do { if ((line = fgetln(st->fp, &linesize)) == NULL) { *errnop = errno; rv = NS_RETURN; break; } if (bufsize <= linesize + _ALIGNBYTES + sizeof(char *)) { *errnop = ERANGE; rv = NS_RETURN; break; } aliases = (char **)_ALIGN(&buffer[linesize+1]); aliases_size = (buffer + bufsize - (char *)aliases)/sizeof(char *); if (aliases_size < 1) { *errnop = ERANGE; rv = NS_RETURN; break; } memcpy(buffer, line, linesize); buffer[linesize] = '\0'; rv = rpcent_unpack(buffer, rpc, aliases, aliases_size, errnop); if (rv != 0) { if (*errnop == 0) { rv = NS_NOTFOUND; continue; } else { rv = NS_RETURN; break; } } switch (how) { case nss_lt_name: if (strcmp(rpc->r_name, name) == 0) goto done; for (rp = rpc->r_aliases; *rp != NULL; rp++) { if (strcmp(*rp, name) == 0) goto done; } rv = NS_NOTFOUND; continue; done: rv = NS_SUCCESS; break; case nss_lt_id: rv = (rpc->r_number == number) ? NS_SUCCESS : NS_NOTFOUND; break; case nss_lt_all: rv = NS_SUCCESS; break; } } while (!(rv & NS_TERMINATE)); if (!stayopen && st->fp!=NULL) { fclose(st->fp); st->fp = NULL; } if ((rv == NS_SUCCESS) && (retval != NULL)) *((struct rpcent **)retval) = rpc; return (rv); } static int files_setrpcent(void *retval, void *mdata, va_list ap) { struct files_state *st; int rv; int f; rv = files_getstate(&st); if (rv != 0) return (NS_UNAVAIL); switch ((enum constants)mdata) { case SETRPCENT: f = va_arg(ap,int); if (st->fp == NULL) st->fp = fopen(RPCDB, "r"); else rewind(st->fp); st->stayopen |= f; break; case ENDRPCENT: if (st->fp != NULL) { fclose(st->fp); st->fp = NULL; } st->stayopen = 0; break; default: break; } return (NS_UNAVAIL); } /* nis backend implementation */ #ifdef YP static void nis_endstate(void *p) { if (p == NULL) return; free(((struct nis_state *)p)->current); free(p); } static int nis_rpcent(void *retval, void *mdata, va_list ap) { char *name; int number; struct rpcent *rpc; char *buffer; size_t bufsize; int *errnop; char **rp; char **aliases; int aliases_size; char *lastkey; char *resultbuf; int resultbuflen; char buf[YPMAXRECORD + 2]; struct nis_state *st; int rv; enum nss_lookup_type how; int no_name_active; how = (enum nss_lookup_type)mdata; switch (how) { case nss_lt_name: name = va_arg(ap, char *); break; case nss_lt_id: number = va_arg(ap, int); break; case nss_lt_all: break; default: return (NS_NOTFOUND); } rpc = va_arg(ap, struct rpcent *); buffer = va_arg(ap, char *); bufsize = va_arg(ap, size_t); errnop = va_arg(ap, int *); *errnop = nis_getstate(&st); if (*errnop != 0) return (NS_UNAVAIL); if (st->domain[0] == '\0') { if (getdomainname(st->domain, sizeof(st->domain)) != 0) { *errnop = errno; return (NS_UNAVAIL); } } no_name_active = 0; do { switch (how) { case nss_lt_name: if (!st->no_name_map) { snprintf(buf, sizeof buf, "%s", name); rv = yp_match(st->domain, "rpc.byname", buf, strlen(buf), &resultbuf, &resultbuflen); switch (rv) { case 0: break; case YPERR_MAP: st->stepping = 0; no_name_active = 1; how = nss_lt_all; rv = NS_NOTFOUND; continue; default: rv = NS_NOTFOUND; goto fin; } } else { st->stepping = 0; no_name_active = 1; how = nss_lt_all; rv = NS_NOTFOUND; continue; } break; case nss_lt_id: snprintf(buf, sizeof buf, "%d", number); if (yp_match(st->domain, "rpc.bynumber", buf, strlen(buf), &resultbuf, &resultbuflen)) { rv = NS_NOTFOUND; goto fin; } break; case nss_lt_all: if (!st->stepping) { rv = yp_first(st->domain, "rpc.bynumber", &st->current, &st->currentlen, &resultbuf, &resultbuflen); if (rv) { rv = NS_NOTFOUND; goto fin; } st->stepping = 1; } else { lastkey = st->current; rv = yp_next(st->domain, "rpc.bynumber", st->current, st->currentlen, &st->current, &st->currentlen, &resultbuf, &resultbuflen); free(lastkey); if (rv) { st->stepping = 0; rv = NS_NOTFOUND; goto fin; } } break; } /* we need a room for additional \n symbol */ if (bufsize <= resultbuflen + 1 + _ALIGNBYTES + sizeof(char *)) { *errnop = ERANGE; rv = NS_RETURN; break; } aliases=(char **)_ALIGN(&buffer[resultbuflen+2]); aliases_size = (buffer + bufsize - (char *)aliases) / sizeof(char *); if (aliases_size < 1) { *errnop = ERANGE; rv = NS_RETURN; break; } /* * rpcent_unpack expects lines terminated with \n -- make it happy */ memcpy(buffer, resultbuf, resultbuflen); buffer[resultbuflen] = '\n'; buffer[resultbuflen+1] = '\0'; free(resultbuf); if (rpcent_unpack(buffer, rpc, aliases, aliases_size, errnop) != 0) { if (*errnop == 0) rv = NS_NOTFOUND; else rv = NS_RETURN; } else { if ((how == nss_lt_all) && (no_name_active != 0)) { if (strcmp(rpc->r_name, name) == 0) goto done; for (rp = rpc->r_aliases; *rp != NULL; rp++) { if (strcmp(*rp, name) == 0) goto done; } rv = NS_NOTFOUND; continue; done: rv = NS_SUCCESS; } else rv = NS_SUCCESS; } } while (!(rv & NS_TERMINATE) && (how == nss_lt_all)); fin: if ((rv == NS_SUCCESS) && (retval != NULL)) *((struct rpcent **)retval) = rpc; return (rv); } static int nis_setrpcent(void *retval, void *mdata, va_list ap) { struct nis_state *st; int rv; rv = nis_getstate(&st); if (rv != 0) return (NS_UNAVAIL); switch ((enum constants)mdata) { case SETRPCENT: case ENDRPCENT: free(st->current); st->current = NULL; st->stepping = 0; break; default: break; } return (NS_UNAVAIL); } #endif #ifdef NS_CACHING static int rpc_id_func(char *buffer, size_t *buffer_size, va_list ap, void *cache_mdata) { char *name; int rpc; size_t desired_size, size; enum nss_lookup_type lookup_type; int res = NS_UNAVAIL; lookup_type = (enum nss_lookup_type)cache_mdata; switch (lookup_type) { case nss_lt_name: name = va_arg(ap, char *); size = strlen(name); desired_size = sizeof(enum nss_lookup_type) + size + 1; if (desired_size > *buffer_size) { res = NS_RETURN; goto fin; } memcpy(buffer, &lookup_type, sizeof(enum nss_lookup_type)); memcpy(buffer + sizeof(enum nss_lookup_type), name, size + 1); res = NS_SUCCESS; break; case nss_lt_id: rpc = va_arg(ap, int); desired_size = sizeof(enum nss_lookup_type) + sizeof(int); if (desired_size > *buffer_size) { res = NS_RETURN; goto fin; } memcpy(buffer, &lookup_type, sizeof(enum nss_lookup_type)); memcpy(buffer + sizeof(enum nss_lookup_type), &rpc, sizeof(int)); res = NS_SUCCESS; break; default: /* should be unreachable */ return (NS_UNAVAIL); } fin: *buffer_size = desired_size; return (res); } static int rpc_marshal_func(char *buffer, size_t *buffer_size, void *retval, va_list ap, void *cache_mdata) { char *name; int num; struct rpcent *rpc; char *orig_buf; size_t orig_buf_size; struct rpcent new_rpc; size_t desired_size, size, aliases_size; char *p; char **alias; switch ((enum nss_lookup_type)cache_mdata) { case nss_lt_name: name = va_arg(ap, char *); break; case nss_lt_id: num = va_arg(ap, int); break; case nss_lt_all: break; default: /* should be unreachable */ return (NS_UNAVAIL); } rpc = va_arg(ap, struct rpcent *); orig_buf = va_arg(ap, char *); orig_buf_size = va_arg(ap, size_t); desired_size = _ALIGNBYTES + sizeof(struct rpcent) + sizeof(char *); if (rpc->r_name != NULL) desired_size += strlen(rpc->r_name) + 1; if (rpc->r_aliases != NULL) { aliases_size = 0; for (alias = rpc->r_aliases; *alias; ++alias) { desired_size += strlen(*alias) + 1; ++aliases_size; } desired_size += _ALIGNBYTES + (aliases_size + 1) * sizeof(char *); } if (*buffer_size < desired_size) { /* this assignment is here for future use */ *buffer_size = desired_size; return (NS_RETURN); } new_rpc = *rpc; *buffer_size = desired_size; memset(buffer, 0, desired_size); p = buffer + sizeof(struct rpcent) + sizeof(char *); memcpy(buffer + sizeof(struct rpcent), &p, sizeof(char *)); p = (char *)_ALIGN(p); if (new_rpc.r_name != NULL) { size = strlen(new_rpc.r_name); memcpy(p, new_rpc.r_name, size); new_rpc.r_name = p; p += size + 1; } if (new_rpc.r_aliases != NULL) { p = (char *)_ALIGN(p); memcpy(p, new_rpc.r_aliases, sizeof(char *) * aliases_size); new_rpc.r_aliases = (char **)p; p += sizeof(char *) * (aliases_size + 1); for (alias = new_rpc.r_aliases; *alias; ++alias) { size = strlen(*alias); memcpy(p, *alias, size); *alias = p; p += size + 1; } } memcpy(buffer, &new_rpc, sizeof(struct rpcent)); return (NS_SUCCESS); } static int rpc_unmarshal_func(char *buffer, size_t buffer_size, void *retval, va_list ap, void *cache_mdata) { char *name; int num; struct rpcent *rpc; char *orig_buf; size_t orig_buf_size; int *ret_errno; char *p; char **alias; switch ((enum nss_lookup_type)cache_mdata) { case nss_lt_name: name = va_arg(ap, char *); break; case nss_lt_id: num = va_arg(ap, int); break; case nss_lt_all: break; default: /* should be unreachable */ return (NS_UNAVAIL); } rpc = va_arg(ap, struct rpcent *); orig_buf = va_arg(ap, char *); orig_buf_size = va_arg(ap, size_t); ret_errno = va_arg(ap, int *); if (orig_buf_size < buffer_size - sizeof(struct rpcent) - sizeof(char *)) { *ret_errno = ERANGE; return (NS_RETURN); } memcpy(rpc, buffer, sizeof(struct rpcent)); memcpy(&p, buffer + sizeof(struct rpcent), sizeof(char *)); orig_buf = (char *)_ALIGN(orig_buf); memcpy(orig_buf, buffer + sizeof(struct rpcent) + sizeof(char *) + _ALIGN(p) - (size_t)p, buffer_size - sizeof(struct rpcent) - sizeof(char *) - _ALIGN(p) + (size_t)p); p = (char *)_ALIGN(p); NS_APPLY_OFFSET(rpc->r_name, orig_buf, p, char *); if (rpc->r_aliases != NULL) { NS_APPLY_OFFSET(rpc->r_aliases, orig_buf, p, char **); for (alias = rpc->r_aliases ; *alias; ++alias) NS_APPLY_OFFSET(*alias, orig_buf, p, char *); } if (retval != NULL) *((struct rpcent **)retval) = rpc; return (NS_SUCCESS); } NSS_MP_CACHE_HANDLING(rpc); #endif /* NS_CACHING */ /* get**_r functions implementation */ static int getrpcbyname_r(const char *name, struct rpcent *rpc, char *buffer, size_t bufsize, struct rpcent **result) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_COMMON_CACHE_INFO_INITIALIZER( rpc, (void *)nss_lt_name, rpc_id_func, rpc_marshal_func, rpc_unmarshal_func); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_rpcent, (void *)nss_lt_name }, #ifdef YP { NSSRC_NIS, nis_rpcent, (void *)nss_lt_name }, #endif #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; int rv, ret_errno; ret_errno = 0; *result = NULL; rv = nsdispatch(result, dtab, NSDB_RPC, "getrpcbyname_r", defaultsrc, name, rpc, buffer, bufsize, &ret_errno); if (rv == NS_SUCCESS) return (0); else return (ret_errno); } static int getrpcbynumber_r(int number, struct rpcent *rpc, char *buffer, size_t bufsize, struct rpcent **result) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_COMMON_CACHE_INFO_INITIALIZER( rpc, (void *)nss_lt_id, rpc_id_func, rpc_marshal_func, rpc_unmarshal_func); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_rpcent, (void *)nss_lt_id }, #ifdef YP { NSSRC_NIS, nis_rpcent, (void *)nss_lt_id }, #endif #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; int rv, ret_errno; ret_errno = 0; *result = NULL; rv = nsdispatch(result, dtab, NSDB_RPC, "getrpcbynumber_r", defaultsrc, number, rpc, buffer, bufsize, &ret_errno); if (rv == NS_SUCCESS) return (0); else return (ret_errno); } static int getrpcent_r(struct rpcent *rpc, char *buffer, size_t bufsize, struct rpcent **result) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_MP_CACHE_INFO_INITIALIZER( rpc, (void *)nss_lt_all, rpc_marshal_func, rpc_unmarshal_func); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_rpcent, (void *)nss_lt_all }, #ifdef YP { NSSRC_NIS, nis_rpcent, (void *)nss_lt_all }, #endif #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; int rv, ret_errno; ret_errno = 0; *result = NULL; rv = nsdispatch(result, dtab, NSDB_RPC, "getrpcent_r", defaultsrc, rpc, buffer, bufsize, &ret_errno); if (rv == NS_SUCCESS) return (0); else return (ret_errno); } /* get** wrappers for get**_r functions implementation */ static void rpcent_endstate(void *p) { if (p == NULL) return; free(((struct rpcent_state *)p)->buffer); free(p); } static int wrap_getrpcbyname_r(union key key, struct rpcent *rpc, char *buffer, size_t bufsize, struct rpcent **res) { return (getrpcbyname_r(key.name, rpc, buffer, bufsize, res)); } static int wrap_getrpcbynumber_r(union key key, struct rpcent *rpc, char *buffer, size_t bufsize, struct rpcent **res) { return (getrpcbynumber_r(key.number, rpc, buffer, bufsize, res)); } static int wrap_getrpcent_r(union key key __unused, struct rpcent *rpc, char *buffer, size_t bufsize, struct rpcent **res) { return (getrpcent_r(rpc, buffer, bufsize, res)); } static struct rpcent * getrpc(int (*fn)(union key, struct rpcent *, char *, size_t, struct rpcent **), union key key) { int rv; struct rpcent *res; struct rpcent_state * st; rv=rpcent_getstate(&st); if (rv != 0) { errno = rv; return NULL; } if (st->buffer == NULL) { st->buffer = malloc(RPCENT_STORAGE_INITIAL); if (st->buffer == NULL) return (NULL); st->bufsize = RPCENT_STORAGE_INITIAL; } do { rv = fn(key, &st->rpc, st->buffer, st->bufsize, &res); if (res == NULL && rv == ERANGE) { free(st->buffer); if ((st->bufsize << 1) > RPCENT_STORAGE_MAX) { st->buffer = NULL; errno = ERANGE; return (NULL); } st->bufsize <<= 1; st->buffer = malloc(st->bufsize); if (st->buffer == NULL) return (NULL); } } while (res == NULL && rv == ERANGE); if (rv != 0) errno = rv; return (res); } struct rpcent * getrpcbyname(char *name) { union key key; key.name = name; return (getrpc(wrap_getrpcbyname_r, key)); } struct rpcent * getrpcbynumber(int number) { union key key; key.number = number; return (getrpc(wrap_getrpcbynumber_r, key)); } struct rpcent * -getrpcent() +getrpcent(void) { union key key; key.number = 0; /* not used */ return (getrpc(wrap_getrpcent_r, key)); } void setrpcent(int stayopen) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_MP_CACHE_INFO_INITIALIZER( rpc, (void *)nss_lt_all, NULL, NULL); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_setrpcent, (void *)SETRPCENT }, #ifdef YP { NSSRC_NIS, nis_setrpcent, (void *)SETRPCENT }, #endif #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; (void)nsdispatch(NULL, dtab, NSDB_RPC, "setrpcent", defaultsrc, stayopen); } void -endrpcent() +endrpcent(void) { #ifdef NS_CACHING static const nss_cache_info cache_info = NS_MP_CACHE_INFO_INITIALIZER( rpc, (void *)nss_lt_all, NULL, NULL); #endif static const ns_dtab dtab[] = { { NSSRC_FILES, files_setrpcent, (void *)ENDRPCENT }, #ifdef YP { NSSRC_NIS, nis_setrpcent, (void *)ENDRPCENT }, #endif #ifdef NS_CACHING NS_CACHE_CB(&cache_info) #endif { NULL, NULL, NULL } }; (void)nsdispatch(NULL, dtab, NSDB_RPC, "endrpcent", defaultsrc); } Index: projects/clang370-import/lib/libc/rpc/getrpcport.c =================================================================== --- projects/clang370-import/lib/libc/rpc/getrpcport.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/getrpcport.c (revision 288126) @@ -1,77 +1,75 @@ /* $NetBSD: getrpcport.c,v 1.16 2000/01/22 22:19:18 mycroft Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)getrpcport.c 1.3 87/08/11 SMI"; static char *sccsid = "@(#)getrpcport.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * Copyright (c) 1985 by Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include #include #include #include #include "un-namespace.h" int -getrpcport(host, prognum, versnum, proto) - char *host; - int prognum, versnum, proto; +getrpcport(char *host, int prognum, int versnum, int proto) { struct sockaddr_in addr; struct hostent *hp; assert(host != NULL); if ((hp = gethostbyname(host)) == NULL) return (0); memset(&addr, 0, sizeof(addr)); addr.sin_len = sizeof(struct sockaddr_in); addr.sin_family = AF_INET; addr.sin_port = 0; if (hp->h_length > addr.sin_len) hp->h_length = addr.sin_len; memcpy(&addr.sin_addr.s_addr, hp->h_addr, (size_t)hp->h_length); /* Inconsistent interfaces need casts! :-( */ return (pmap_getport(&addr, (u_long)prognum, (u_long)versnum, (u_int)proto)); } Index: projects/clang370-import/lib/libc/rpc/mt_misc.c =================================================================== --- projects/clang370-import/lib/libc/rpc/mt_misc.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/mt_misc.c (revision 288126) @@ -1,117 +1,117 @@ /* $NetBSD: mt_misc.c,v 1.1 2000/06/02 23:11:11 fvdl Exp $ */ /* #pragma ident "@(#)mt_misc.c 1.24 93/04/29 SMI" */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include "reentrant.h" #include #include #include #include #include "un-namespace.h" #include "mt_misc.h" /* Take these objects out of the application namespace. */ #define svc_lock __svc_lock #define svc_fd_lock __svc_fd_lock #define rpcbaddr_cache_lock __rpcbaddr_cache_lock #define authdes_ops_lock __authdes_ops_lock #define authnone_lock __authnone_lock #define authsvc_lock __authsvc_lock #define clnt_fd_lock __clnt_fd_lock #define clntraw_lock __clntraw_lock #define dupreq_lock __dupreq_lock #define loopnconf_lock __loopnconf_lock #define ops_lock __ops_lock #define proglst_lock __proglst_lock #define rpcsoc_lock __rpcsoc_lock #define svcraw_lock __svcraw_lock #define xprtlist_lock __xprtlist_lock /* protects the services list (svc.c) */ pthread_rwlock_t svc_lock = PTHREAD_RWLOCK_INITIALIZER; /* protects svc_fdset and the xports[] array */ pthread_rwlock_t svc_fd_lock = PTHREAD_RWLOCK_INITIALIZER; /* protects the RPCBIND address cache */ pthread_rwlock_t rpcbaddr_cache_lock = PTHREAD_RWLOCK_INITIALIZER; /* serializes authdes ops initializations */ pthread_mutex_t authdes_ops_lock = PTHREAD_MUTEX_INITIALIZER; /* protects des stats list */ pthread_mutex_t svcauthdesstats_lock = PTHREAD_MUTEX_INITIALIZER; /* auth_none.c serialization */ pthread_mutex_t authnone_lock = PTHREAD_MUTEX_INITIALIZER; /* protects the Auths list (svc_auth.c) */ pthread_mutex_t authsvc_lock = PTHREAD_MUTEX_INITIALIZER; /* protects client-side fd lock array */ pthread_mutex_t clnt_fd_lock = PTHREAD_MUTEX_INITIALIZER; /* clnt_raw.c serialization */ pthread_mutex_t clntraw_lock = PTHREAD_MUTEX_INITIALIZER; /* dupreq variables (svc_dg.c) */ pthread_mutex_t dupreq_lock = PTHREAD_MUTEX_INITIALIZER; /* loopnconf (rpcb_clnt.c) */ pthread_mutex_t loopnconf_lock = PTHREAD_MUTEX_INITIALIZER; /* serializes ops initializations */ pthread_mutex_t ops_lock = PTHREAD_MUTEX_INITIALIZER; /* protects proglst list (svc_simple.c) */ pthread_mutex_t proglst_lock = PTHREAD_MUTEX_INITIALIZER; /* serializes clnt_com_create() (rpc_soc.c) */ pthread_mutex_t rpcsoc_lock = PTHREAD_MUTEX_INITIALIZER; /* svc_raw.c serialization */ pthread_mutex_t svcraw_lock = PTHREAD_MUTEX_INITIALIZER; /* xprtlist (svc_generic.c) */ pthread_mutex_t xprtlist_lock = PTHREAD_MUTEX_INITIALIZER; #undef rpc_createerr struct rpc_createerr rpc_createerr; static thread_key_t rce_key; static once_t rce_once = ONCE_INITIALIZER; static int rce_key_error; static void rce_key_init(void) { rce_key_error = thr_keycreate(&rce_key, free); } struct rpc_createerr * -__rpc_createerr() +__rpc_createerr(void) { struct rpc_createerr *rce_addr = 0; if (thr_main()) return (&rpc_createerr); if (thr_once(&rce_once, rce_key_init) != 0 || rce_key_error != 0) return (&rpc_createerr); rce_addr = (struct rpc_createerr *)thr_getspecific(rce_key); if (!rce_addr) { rce_addr = (struct rpc_createerr *) malloc(sizeof (struct rpc_createerr)); if (thr_setspecific(rce_key, (void *) rce_addr) != 0) { if (rce_addr) free(rce_addr); return (&rpc_createerr); } memset(rce_addr, 0, sizeof (struct rpc_createerr)); return (rce_addr); } return (rce_addr); } Index: projects/clang370-import/lib/libc/rpc/netname.c =================================================================== --- projects/clang370-import/lib/libc/rpc/netname.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/netname.c (revision 288126) @@ -1,148 +1,141 @@ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)netname.c 1.8 91/03/11 Copyr 1986 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * netname utility routines * convert from unix names to network names and vice-versa * This module is operating system dependent! * What we define here will work with any unix system that has adopted * the sun NIS domain architecture. */ #include "namespace.h" #include #include #include #ifdef YP #include #include #endif #include #include #include #include #include #include #include "un-namespace.h" #ifndef MAXHOSTNAMELEN #define MAXHOSTNAMELEN 256 #endif #define TYPE_BIT(type) (sizeof (type) * CHAR_BIT) #define TYPE_SIGNED(type) (((type) -1) < 0) /* ** 302 / 1000 is log10(2.0) rounded up. ** Subtract one for the sign bit if the type is signed; ** add one for integer division truncation; ** add one more for a minus sign if the type is signed. */ #define INT_STRLEN_MAXIMUM(type) \ ((TYPE_BIT(type) - TYPE_SIGNED(type)) * 302 / 1000 + 1 + TYPE_SIGNED(type)) static char *OPSYS = "unix"; /* * Figure out my fully qualified network name */ int -getnetname(name) - char name[MAXNETNAMELEN+1]; +getnetname(char name[MAXNETNAMELEN+1]) { uid_t uid; uid = geteuid(); if (uid == 0) { return (host2netname(name, (char *) NULL, (char *) NULL)); } else { return (user2netname(name, uid, (char *) NULL)); } } /* * Convert unix cred to network-name */ int -user2netname(netname, uid, domain) - char netname[MAXNETNAMELEN + 1]; - const uid_t uid; - const char *domain; +user2netname(char netname[MAXNETNAMELEN + 1], const uid_t uid, const char *domain) { char *dfltdom; if (domain == NULL) { if (__rpc_get_default_domain(&dfltdom) != 0) { return (0); } domain = dfltdom; } if (strlen(domain) + 1 + INT_STRLEN_MAXIMUM(u_long) + 1 + strlen(OPSYS) > MAXNETNAMELEN) { return (0); } (void) sprintf(netname, "%s.%ld@%s", OPSYS, (u_long)uid, domain); return (1); } /* * Convert host to network-name */ int -host2netname(netname, host, domain) - char netname[MAXNETNAMELEN + 1]; - const char *host; - const char *domain; +host2netname(char netname[MAXNETNAMELEN + 1], const char *host, const char *domain) { char *dfltdom; char hostname[MAXHOSTNAMELEN+1]; if (domain == NULL) { if (__rpc_get_default_domain(&dfltdom) != 0) { return (0); } domain = dfltdom; } if (host == NULL) { (void) gethostname(hostname, sizeof(hostname)); host = hostname; } if (strlen(domain) + 1 + strlen(host) + 1 + strlen(OPSYS) > MAXNETNAMELEN) { return (0); } (void) sprintf(netname, "%s.%s@%s", OPSYS, host, domain); return (1); } Index: projects/clang370-import/lib/libc/rpc/netnamer.c =================================================================== --- projects/clang370-import/lib/libc/rpc/netnamer.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/netnamer.c (revision 288126) @@ -1,329 +1,319 @@ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)netnamer.c 1.13 91/03/11 Copyr 1986 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * netname utility routines convert from unix names to network names and * vice-versa This module is operating system dependent! What we define here * will work with any unix system that has adopted the sun NIS domain * architecture. */ #include "namespace.h" #include #include #include #ifdef YP #include #include #endif #include #include #include #include #include #include #include #include "un-namespace.h" static char *OPSYS = "unix"; #ifdef YP static char *NETID = "netid.byname"; #endif static char *NETIDFILE = "/etc/netid"; static int getnetid( char *, char * ); static int _getgroups( char *, gid_t * ); /* * Convert network-name into unix credential */ int -netname2user(netname, uidp, gidp, gidlenp, gidlist) - char netname[MAXNETNAMELEN + 1]; - uid_t *uidp; - gid_t *gidp; - int *gidlenp; - gid_t *gidlist; +netname2user(char netname[MAXNETNAMELEN + 1], uid_t *uidp, gid_t *gidp, + int *gidlenp, gid_t *gidlist) { char *p; int gidlen; uid_t uid; long luid; struct passwd *pwd; char val[1024]; char *val1, *val2; char *domain; int vallen; int err; if (getnetid(netname, val)) { char *res = val; p = strsep(&res, ":"); if (p == NULL) return (0); *uidp = (uid_t) atol(p); p = strsep(&res, "\n,"); if (p == NULL) { return (0); } *gidp = (gid_t) atol(p); for (gidlen = 0; gidlen < NGRPS; gidlen++) { p = strsep(&res, "\n,"); if (p == NULL) break; gidlist[gidlen] = (gid_t) atol(p); } *gidlenp = gidlen; return (1); } val1 = strchr(netname, '.'); if (val1 == NULL) return (0); if (strncmp(netname, OPSYS, (val1-netname))) return (0); val1++; val2 = strchr(val1, '@'); if (val2 == NULL) return (0); vallen = val2 - val1; if (vallen > (1024 - 1)) vallen = 1024 - 1; (void) strncpy(val, val1, 1024); val[vallen] = 0; err = __rpc_get_default_domain(&domain); /* change to rpc */ if (err) return (0); if (strcmp(val2 + 1, domain)) return (0); /* wrong domain */ if (sscanf(val, "%ld", &luid) != 1) return (0); uid = luid; /* use initgroups method */ pwd = getpwuid(uid); if (pwd == NULL) return (0); *uidp = pwd->pw_uid; *gidp = pwd->pw_gid; *gidlenp = _getgroups(pwd->pw_name, gidlist); return (1); } /* * initgroups */ static int -_getgroups(uname, groups) - char *uname; - gid_t groups[NGRPS]; +_getgroups(char *uname, gid_t groups[NGRPS]) { gid_t ngroups = 0; struct group *grp; int i; int j; int filter; setgrent(); while ((grp = getgrent())) { for (i = 0; grp->gr_mem[i]; i++) if (!strcmp(grp->gr_mem[i], uname)) { if (ngroups == NGRPS) { #ifdef DEBUG fprintf(stderr, "initgroups: %s is in too many groups\n", uname); #endif goto toomany; } /* filter out duplicate group entries */ filter = 0; for (j = 0; j < ngroups; j++) if (groups[j] == grp->gr_gid) { filter++; break; } if (!filter) groups[ngroups++] = grp->gr_gid; } } toomany: endgrent(); return (ngroups); } /* * Convert network-name to hostname */ int -netname2host(netname, hostname, hostlen) - char netname[MAXNETNAMELEN + 1]; - char *hostname; - int hostlen; +netname2host(char netname[MAXNETNAMELEN + 1], char *hostname, int hostlen) { int err; char valbuf[1024]; char *val; char *val2; int vallen; char *domain; if (getnetid(netname, valbuf)) { val = valbuf; if ((*val == '0') && (val[1] == ':')) { (void) strncpy(hostname, val + 2, hostlen); return (1); } } val = strchr(netname, '.'); if (val == NULL) return (0); if (strncmp(netname, OPSYS, (val - netname))) return (0); val++; val2 = strchr(val, '@'); if (val2 == NULL) return (0); vallen = val2 - val; if (vallen > (hostlen - 1)) vallen = hostlen - 1; (void) strncpy(hostname, val, vallen); hostname[vallen] = 0; err = __rpc_get_default_domain(&domain); /* change to rpc */ if (err) return (0); if (strcmp(val2 + 1, domain)) return (0); /* wrong domain */ else return (1); } /* * reads the file /etc/netid looking for a + to optionally go to the * network information service. */ int -getnetid(key, ret) - char *key, *ret; +getnetid(char *key, char *ret) { char buf[1024]; /* big enough */ char *res; char *mkey; char *mval; FILE *fd; #ifdef YP char *domain; int err; char *lookup; int len; #endif fd = fopen(NETIDFILE, "r"); if (fd == NULL) { #ifdef YP res = "+"; goto getnetidyp; #else return (0); #endif } for (;;) { if (fd == NULL) return (0); /* getnetidyp brings us here */ res = fgets(buf, sizeof(buf), fd); if (res == NULL) { fclose(fd); return (0); } if (res[0] == '#') continue; else if (res[0] == '+') { #ifdef YP getnetidyp: err = yp_get_default_domain(&domain); if (err) { continue; } lookup = NULL; err = yp_match(domain, NETID, key, strlen(key), &lookup, &len); if (err) { #ifdef DEBUG fprintf(stderr, "match failed error %d\n", err); #endif continue; } lookup[len] = 0; strcpy(ret, lookup); free(lookup); if (fd != NULL) fclose(fd); return (2); #else /* YP */ #ifdef DEBUG fprintf(stderr, "Bad record in %s '+' -- NIS not supported in this library copy\n", NETIDFILE); #endif continue; #endif /* YP */ } else { mkey = strsep(&res, "\t "); if (mkey == NULL) { fprintf(stderr, "Bad record in %s -- %s", NETIDFILE, buf); continue; } do { mval = strsep(&res, " \t#\n"); } while (mval != NULL && !*mval); if (mval == NULL) { fprintf(stderr, "Bad record in %s val problem - %s", NETIDFILE, buf); continue; } if (strcmp(mkey, key) == 0) { strcpy(ret, mval); fclose(fd); return (1); } } } } Index: projects/clang370-import/lib/libc/rpc/pmap_getmaps.c =================================================================== --- projects/clang370-import/lib/libc/rpc/pmap_getmaps.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/pmap_getmaps.c (revision 288126) @@ -1,99 +1,98 @@ /* $NetBSD: pmap_getmaps.c,v 1.16 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)pmap_getmaps.c 1.10 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)pmap_getmaps.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * pmap_getmap.c * Client interface to pmap rpc service. * contains pmap_getmaps, which is only tcp service involved * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #define NAMELEN 255 #define MAX_BROADCAST_SIZE 1400 /* * Get a copy of the current port maps. * Calls the pmap service remotely to do get the maps. */ struct pmaplist * -pmap_getmaps(address) - struct sockaddr_in *address; +pmap_getmaps(struct sockaddr_in *address) { struct pmaplist *head = NULL; int sock = -1; struct timeval minutetimeout; CLIENT *client; assert(address != NULL); minutetimeout.tv_sec = 60; minutetimeout.tv_usec = 0; address->sin_port = htons(PMAPPORT); client = clnttcp_create(address, PMAPPROG, PMAPVERS, &sock, 50, 500); if (client != NULL) { if (CLNT_CALL(client, (rpcproc_t)PMAPPROC_DUMP, (xdrproc_t)xdr_void, NULL, (xdrproc_t)xdr_pmaplist, &head, minutetimeout) != RPC_SUCCESS) { clnt_perror(client, "pmap_getmaps rpc problem"); } CLNT_DESTROY(client); } address->sin_port = 0; return (head); } Index: projects/clang370-import/lib/libc/rpc/pmap_getport.c =================================================================== --- projects/clang370-import/lib/libc/rpc/pmap_getport.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/pmap_getport.c (revision 288126) @@ -1,103 +1,100 @@ /* $NetBSD: pmap_getport.c,v 1.16 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "from: @(#)pmap_getport.c 1.9 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "from: @(#)pmap_getport.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * pmap_getport.c * Client interface to pmap rpc service. * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include #include #include #include #include #include "un-namespace.h" static const struct timeval timeout = { 5, 0 }; static const struct timeval tottimeout = { 60, 0 }; /* * Find the mapped port for program,version. * Calls the pmap service remotely to do the lookup. * Returns 0 if no map exists. */ u_short -pmap_getport(address, program, version, protocol) - struct sockaddr_in *address; - u_long program; - u_long version; - u_int protocol; +pmap_getport(struct sockaddr_in *address, u_long program, u_long version, + u_int protocol) { u_short port = 0; int sock = -1; CLIENT *client; struct pmap parms; assert(address != NULL); address->sin_port = htons(PMAPPORT); client = clntudp_bufcreate(address, PMAPPROG, PMAPVERS, timeout, &sock, RPCSMALLMSGSIZE, RPCSMALLMSGSIZE); if (client != NULL) { parms.pm_prog = program; parms.pm_vers = version; parms.pm_prot = protocol; parms.pm_port = 0; /* not needed or used */ if (CLNT_CALL(client, (rpcproc_t)PMAPPROC_GETPORT, (xdrproc_t)xdr_pmap, &parms, (xdrproc_t)xdr_u_short, &port, tottimeout) != RPC_SUCCESS){ rpc_createerr.cf_stat = RPC_PMAPFAILURE; clnt_geterr(client, &rpc_createerr.cf_error); } else if (port == 0) { rpc_createerr.cf_stat = RPC_PROGNOTREGISTERED; } CLNT_DESTROY(client); } address->sin_port = 0; return (port); } Index: projects/clang370-import/lib/libc/rpc/pmap_prot.c =================================================================== --- projects/clang370-import/lib/libc/rpc/pmap_prot.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/pmap_prot.c (revision 288126) @@ -1,68 +1,66 @@ /* $NetBSD: pmap_prot.c,v 1.10 2000/01/22 22:19:18 mycroft Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)pmap_prot.c 1.17 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)pmap_prot.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * pmap_prot.c * Protocol for the local binder service, or pmap. * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include "un-namespace.h" bool_t -xdr_pmap(xdrs, regs) - XDR *xdrs; - struct pmap *regs; +xdr_pmap(XDR *xdrs, struct pmap *regs) { assert(xdrs != NULL); assert(regs != NULL); if (xdr_u_long(xdrs, ®s->pm_prog) && xdr_u_long(xdrs, ®s->pm_vers) && xdr_u_long(xdrs, ®s->pm_prot)) return (xdr_u_long(xdrs, ®s->pm_port)); return (FALSE); } Index: projects/clang370-import/lib/libc/rpc/pmap_prot2.c =================================================================== --- projects/clang370-import/lib/libc/rpc/pmap_prot2.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/pmap_prot2.c (revision 288126) @@ -1,142 +1,138 @@ /* $NetBSD: pmap_prot2.c,v 1.14 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)pmap_prot2.c 1.3 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)pmap_prot2.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * pmap_prot2.c * Protocol for the local binder service, or pmap. * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include "un-namespace.h" /* * What is going on with linked lists? (!) * First recall the link list declaration from pmap_prot.h: * * struct pmaplist { * struct pmap pml_map; * struct pmaplist *pml_map; * }; * * Compare that declaration with a corresponding xdr declaration that * is (a) pointer-less, and (b) recursive: * * typedef union switch (bool_t) { * * case TRUE: struct { * struct pmap; * pmaplist_t foo; * }; * * case FALSE: struct {}; * } pmaplist_t; * * Notice that the xdr declaration has no nxt pointer while * the C declaration has no bool_t variable. The bool_t can be * interpreted as ``more data follows me''; if FALSE then nothing * follows this bool_t; if TRUE then the bool_t is followed by * an actual struct pmap, and then (recursively) by the * xdr union, pamplist_t. * * This could be implemented via the xdr_union primitive, though this * would cause a one recursive call per element in the list. Rather than do * that we can ``unwind'' the recursion * into a while loop and do the union arms in-place. * * The head of the list is what the C programmer wishes to past around * the net, yet is the data that the pointer points to which is interesting; * this sounds like a job for xdr_reference! */ bool_t -xdr_pmaplist(xdrs, rp) - XDR *xdrs; - struct pmaplist **rp; +xdr_pmaplist(XDR *xdrs, struct pmaplist **rp) { /* * more_elements is pre-computed in case the direction is * XDR_ENCODE or XDR_FREE. more_elements is overwritten by * xdr_bool when the direction is XDR_DECODE. */ bool_t more_elements; int freeing; struct pmaplist **next = NULL; /* pacify gcc */ assert(xdrs != NULL); assert(rp != NULL); freeing = (xdrs->x_op == XDR_FREE); for (;;) { more_elements = (bool_t)(*rp != NULL); if (! xdr_bool(xdrs, &more_elements)) return (FALSE); if (! more_elements) return (TRUE); /* we are done */ /* * the unfortunate side effect of non-recursion is that in * the case of freeing we must remember the next object * before we free the current object ... */ if (freeing) next = &((*rp)->pml_next); if (! xdr_reference(xdrs, (caddr_t *)rp, (u_int)sizeof(struct pmaplist), (xdrproc_t)xdr_pmap)) return (FALSE); rp = (freeing) ? next : &((*rp)->pml_next); } } /* * xdr_pmaplist_ptr() is specified to take a PMAPLIST *, but is identical in * functionality to xdr_pmaplist(). */ bool_t -xdr_pmaplist_ptr(xdrs, rp) - XDR *xdrs; - struct pmaplist *rp; +xdr_pmaplist_ptr(XDR *xdrs, struct pmaplist *rp) { return xdr_pmaplist(xdrs, (struct pmaplist **)(void *)rp); } Index: projects/clang370-import/lib/libc/rpc/pmap_rmt.c =================================================================== --- projects/clang370-import/lib/libc/rpc/pmap_rmt.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/pmap_rmt.c (revision 288126) @@ -1,175 +1,166 @@ /* $NetBSD: pmap_rmt.c,v 1.29 2000/07/06 03:10:34 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)pmap_rmt.c 1.21 87/08/27 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)pmap_rmt.c 2.2 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * pmap_rmt.c * Client interface to pmap rpc service. * remote call and broadcast service * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" static const struct timeval timeout = { 3, 0 }; /* * pmapper remote-call-service interface. * This routine is used to call the pmapper remote call service * which will look up a service program in the port maps, and then * remotely call that routine with the given parameters. This allows * programs to do a lookup and call in one step. */ enum clnt_stat -pmap_rmtcall(addr, prog, vers, proc, xdrargs, argsp, xdrres, resp, tout, - port_ptr) - struct sockaddr_in *addr; - u_long prog, vers, proc; - xdrproc_t xdrargs, xdrres; - caddr_t argsp, resp; - struct timeval tout; - u_long *port_ptr; +pmap_rmtcall(struct sockaddr_in *addr, u_long prog, u_long vers, u_long proc, + xdrproc_t xdrargs, caddr_t argsp, xdrproc_t xdrres, caddr_t resp, + struct timeval tout, u_long *port_ptr) { int sock = -1; CLIENT *client; struct rmtcallargs a; struct rmtcallres r; enum clnt_stat stat; assert(addr != NULL); assert(port_ptr != NULL); addr->sin_port = htons(PMAPPORT); client = clntudp_create(addr, PMAPPROG, PMAPVERS, timeout, &sock); if (client != NULL) { a.prog = prog; a.vers = vers; a.proc = proc; a.args_ptr = argsp; a.xdr_args = xdrargs; r.port_ptr = port_ptr; r.results_ptr = resp; r.xdr_results = xdrres; stat = CLNT_CALL(client, (rpcproc_t)PMAPPROC_CALLIT, (xdrproc_t)xdr_rmtcall_args, &a, (xdrproc_t)xdr_rmtcallres, &r, tout); CLNT_DESTROY(client); } else { stat = RPC_FAILED; } addr->sin_port = 0; return (stat); } /* * XDR remote call arguments * written for XDR_ENCODE direction only */ bool_t -xdr_rmtcall_args(xdrs, cap) - XDR *xdrs; - struct rmtcallargs *cap; +xdr_rmtcall_args(XDR *xdrs, struct rmtcallargs *cap) { u_int lenposition, argposition, position; assert(xdrs != NULL); assert(cap != NULL); if (xdr_u_long(xdrs, &(cap->prog)) && xdr_u_long(xdrs, &(cap->vers)) && xdr_u_long(xdrs, &(cap->proc))) { lenposition = XDR_GETPOS(xdrs); if (! xdr_u_long(xdrs, &(cap->arglen))) return (FALSE); argposition = XDR_GETPOS(xdrs); if (! (*(cap->xdr_args))(xdrs, cap->args_ptr)) return (FALSE); position = XDR_GETPOS(xdrs); cap->arglen = (u_long)position - (u_long)argposition; XDR_SETPOS(xdrs, lenposition); if (! xdr_u_long(xdrs, &(cap->arglen))) return (FALSE); XDR_SETPOS(xdrs, position); return (TRUE); } return (FALSE); } /* * XDR remote call results * written for XDR_DECODE direction only */ bool_t -xdr_rmtcallres(xdrs, crp) - XDR *xdrs; - struct rmtcallres *crp; +xdr_rmtcallres(XDR *xdrs, struct rmtcallres *crp) { caddr_t port_ptr; assert(xdrs != NULL); assert(crp != NULL); port_ptr = (caddr_t)(void *)crp->port_ptr; if (xdr_reference(xdrs, &port_ptr, sizeof (u_long), (xdrproc_t)xdr_u_long) && xdr_u_long(xdrs, &crp->resultslen)) { crp->port_ptr = (u_long *)(void *)port_ptr; return ((*(crp->xdr_results))(xdrs, crp->results_ptr)); } return (FALSE); } Index: projects/clang370-import/lib/libc/rpc/rpc_callmsg.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpc_callmsg.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpc_callmsg.c (revision 288126) @@ -1,206 +1,204 @@ /* $NetBSD: rpc_callmsg.c,v 1.16 2000/07/14 08:40:42 fvdl Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)rpc_callmsg.c 1.4 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)rpc_callmsg.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * rpc_callmsg.c * * Copyright (C) 1984, Sun Microsystems, Inc. * */ #include "namespace.h" #include #include #include #include #include "un-namespace.h" /* * XDR a call message */ bool_t -xdr_callmsg(xdrs, cmsg) - XDR *xdrs; - struct rpc_msg *cmsg; +xdr_callmsg(XDR *xdrs, struct rpc_msg *cmsg) { enum msg_type *prm_direction; int32_t *buf; struct opaque_auth *oa; assert(xdrs != NULL); assert(cmsg != NULL); if (xdrs->x_op == XDR_ENCODE) { if (cmsg->rm_call.cb_cred.oa_length > MAX_AUTH_BYTES) { return (FALSE); } if (cmsg->rm_call.cb_verf.oa_length > MAX_AUTH_BYTES) { return (FALSE); } buf = XDR_INLINE(xdrs, 8 * BYTES_PER_XDR_UNIT + RNDUP(cmsg->rm_call.cb_cred.oa_length) + 2 * BYTES_PER_XDR_UNIT + RNDUP(cmsg->rm_call.cb_verf.oa_length)); if (buf != NULL) { IXDR_PUT_INT32(buf, cmsg->rm_xid); IXDR_PUT_ENUM(buf, cmsg->rm_direction); if (cmsg->rm_direction != CALL) { return (FALSE); } IXDR_PUT_INT32(buf, cmsg->rm_call.cb_rpcvers); if (cmsg->rm_call.cb_rpcvers != RPC_MSG_VERSION) { return (FALSE); } IXDR_PUT_INT32(buf, cmsg->rm_call.cb_prog); IXDR_PUT_INT32(buf, cmsg->rm_call.cb_vers); IXDR_PUT_INT32(buf, cmsg->rm_call.cb_proc); oa = &cmsg->rm_call.cb_cred; IXDR_PUT_ENUM(buf, oa->oa_flavor); IXDR_PUT_INT32(buf, oa->oa_length); if (oa->oa_length) { memmove(buf, oa->oa_base, oa->oa_length); buf += RNDUP(oa->oa_length) / sizeof (int32_t); } oa = &cmsg->rm_call.cb_verf; IXDR_PUT_ENUM(buf, oa->oa_flavor); IXDR_PUT_INT32(buf, oa->oa_length); if (oa->oa_length) { memmove(buf, oa->oa_base, oa->oa_length); /* no real need.... buf += RNDUP(oa->oa_length) / sizeof (int32_t); */ } return (TRUE); } } if (xdrs->x_op == XDR_DECODE) { buf = XDR_INLINE(xdrs, 8 * BYTES_PER_XDR_UNIT); if (buf != NULL) { cmsg->rm_xid = IXDR_GET_U_INT32(buf); cmsg->rm_direction = IXDR_GET_ENUM(buf, enum msg_type); if (cmsg->rm_direction != CALL) { return (FALSE); } cmsg->rm_call.cb_rpcvers = IXDR_GET_U_INT32(buf); if (cmsg->rm_call.cb_rpcvers != RPC_MSG_VERSION) { return (FALSE); } cmsg->rm_call.cb_prog = IXDR_GET_U_INT32(buf); cmsg->rm_call.cb_vers = IXDR_GET_U_INT32(buf); cmsg->rm_call.cb_proc = IXDR_GET_U_INT32(buf); oa = &cmsg->rm_call.cb_cred; oa->oa_flavor = IXDR_GET_ENUM(buf, enum_t); oa->oa_length = (u_int)IXDR_GET_U_INT32(buf); if (oa->oa_length) { if (oa->oa_length > MAX_AUTH_BYTES) { return (FALSE); } if (oa->oa_base == NULL) { oa->oa_base = (caddr_t) mem_alloc(oa->oa_length); if (oa->oa_base == NULL) return (FALSE); } buf = XDR_INLINE(xdrs, RNDUP(oa->oa_length)); if (buf == NULL) { if (xdr_opaque(xdrs, oa->oa_base, oa->oa_length) == FALSE) { return (FALSE); } } else { memmove(oa->oa_base, buf, oa->oa_length); /* no real need.... buf += RNDUP(oa->oa_length) / sizeof (int32_t); */ } } oa = &cmsg->rm_call.cb_verf; buf = XDR_INLINE(xdrs, 2 * BYTES_PER_XDR_UNIT); if (buf == NULL) { if (xdr_enum(xdrs, &oa->oa_flavor) == FALSE || xdr_u_int(xdrs, &oa->oa_length) == FALSE) { return (FALSE); } } else { oa->oa_flavor = IXDR_GET_ENUM(buf, enum_t); oa->oa_length = (u_int)IXDR_GET_U_INT32(buf); } if (oa->oa_length) { if (oa->oa_length > MAX_AUTH_BYTES) { return (FALSE); } if (oa->oa_base == NULL) { oa->oa_base = (caddr_t) mem_alloc(oa->oa_length); if (oa->oa_base == NULL) return (FALSE); } buf = XDR_INLINE(xdrs, RNDUP(oa->oa_length)); if (buf == NULL) { if (xdr_opaque(xdrs, oa->oa_base, oa->oa_length) == FALSE) { return (FALSE); } } else { memmove(oa->oa_base, buf, oa->oa_length); /* no real need... buf += RNDUP(oa->oa_length) / sizeof (int32_t); */ } } return (TRUE); } } prm_direction = &cmsg->rm_direction; if ( xdr_u_int32_t(xdrs, &(cmsg->rm_xid)) && xdr_enum(xdrs, (enum_t *) prm_direction) && (cmsg->rm_direction == CALL) && xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_rpcvers)) && (cmsg->rm_call.cb_rpcvers == RPC_MSG_VERSION) && xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_prog)) && xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_vers)) && xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_proc)) && xdr_opaque_auth(xdrs, &(cmsg->rm_call.cb_cred)) ) return (xdr_opaque_auth(xdrs, &(cmsg->rm_call.cb_verf))); return (FALSE); } Index: projects/clang370-import/lib/libc/rpc/rpc_generic.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpc_generic.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpc_generic.c (revision 288126) @@ -1,846 +1,837 @@ /* $NetBSD: rpc_generic.c,v 1.4 2000/09/28 09:07:04 kleink Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ /* #pragma ident "@(#)rpc_generic.c 1.17 94/04/24 SMI" */ #include __FBSDID("$FreeBSD$"); /* * rpc_generic.c, Miscl routines for RPC. * */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #include "mt_misc.h" struct handle { NCONF_HANDLE *nhandle; int nflag; /* Whether NETPATH or NETCONFIG */ int nettype; }; static const struct _rpcnettype { const char *name; const int type; } _rpctypelist[] = { { "netpath", _RPC_NETPATH }, { "visible", _RPC_VISIBLE }, { "circuit_v", _RPC_CIRCUIT_V }, { "datagram_v", _RPC_DATAGRAM_V }, { "circuit_n", _RPC_CIRCUIT_N }, { "datagram_n", _RPC_DATAGRAM_N }, { "tcp", _RPC_TCP }, { "udp", _RPC_UDP }, { 0, _RPC_NONE } }; struct netid_af { const char *netid; int af; int protocol; }; static const struct netid_af na_cvt[] = { { "udp", AF_INET, IPPROTO_UDP }, { "tcp", AF_INET, IPPROTO_TCP }, #ifdef INET6 { "udp6", AF_INET6, IPPROTO_UDP }, { "tcp6", AF_INET6, IPPROTO_TCP }, #endif { "local", AF_LOCAL, 0 } }; #if 0 static char *strlocase(char *); #endif static int getnettype(const char *); /* * Cache the result of getrlimit(), so we don't have to do an * expensive call every time. */ int -__rpc_dtbsize() +__rpc_dtbsize(void) { static int tbsize; struct rlimit rl; if (tbsize) { return (tbsize); } if (getrlimit(RLIMIT_NOFILE, &rl) == 0) { return (tbsize = (int)rl.rlim_max); } /* * Something wrong. I'll try to save face by returning a * pessimistic number. */ return (32); } /* * Find the appropriate buffer size + * + * size - Size requested */ u_int /*ARGSUSED*/ -__rpc_get_t_size(af, proto, size) - int af, proto; - int size; /* Size requested */ +__rpc_get_t_size(int af, int proto, int size) { int maxsize, defsize; maxsize = 256 * 1024; /* XXX */ switch (proto) { case IPPROTO_TCP: defsize = 64 * 1024; /* XXX */ break; case IPPROTO_UDP: defsize = UDPMSGSIZE; break; default: defsize = RPC_MAXDATASIZE; break; } if (size == 0) return defsize; /* Check whether the value is within the upper max limit */ return (size > maxsize ? (u_int)maxsize : (u_int)size); } /* * Find the appropriate address buffer size */ u_int -__rpc_get_a_size(af) - int af; +__rpc_get_a_size(int af) { switch (af) { case AF_INET: return sizeof (struct sockaddr_in); #ifdef INET6 case AF_INET6: return sizeof (struct sockaddr_in6); #endif case AF_LOCAL: return sizeof (struct sockaddr_un); default: break; } return ((u_int)RPC_MAXADDRSIZE); } #if 0 static char * -strlocase(p) - char *p; +strlocase(char *p) { char *t = p; for (; *p; p++) if (isupper(*p)) *p = tolower(*p); return (t); } #endif /* * Returns the type of the network as defined in * If nettype is NULL, it defaults to NETPATH. */ static int -getnettype(nettype) - const char *nettype; +getnettype(const char *nettype) { int i; if ((nettype == NULL) || (nettype[0] == 0)) { return (_RPC_NETPATH); /* Default */ } #if 0 nettype = strlocase(nettype); #endif for (i = 0; _rpctypelist[i].name; i++) if (strcasecmp(nettype, _rpctypelist[i].name) == 0) { return (_rpctypelist[i].type); } return (_rpctypelist[i].type); } static thread_key_t tcp_key, udp_key; static once_t keys_once = ONCE_INITIALIZER; static int tcp_key_error, udp_key_error; static void keys_init(void) { tcp_key_error = thr_keycreate(&tcp_key, free); udp_key_error = thr_keycreate(&udp_key, free); } /* * For the given nettype (tcp or udp only), return the first structure found. * This should be freed by calling freenetconfigent() */ struct netconfig * -__rpc_getconfip(nettype) - const char *nettype; +__rpc_getconfip(const char *nettype) { char *netid; char *netid_tcp = (char *) NULL; char *netid_udp = (char *) NULL; static char *netid_tcp_main; static char *netid_udp_main; struct netconfig *dummy; int main_thread; if ((main_thread = thr_main())) { netid_udp = netid_udp_main; netid_tcp = netid_tcp_main; } else { if (thr_once(&keys_once, keys_init) != 0 || tcp_key_error != 0 || udp_key_error != 0) return (NULL); netid_tcp = (char *)thr_getspecific(tcp_key); netid_udp = (char *)thr_getspecific(udp_key); } if (!netid_udp && !netid_tcp) { struct netconfig *nconf; void *confighandle; if (!(confighandle = setnetconfig())) { syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); return (NULL); } while ((nconf = getnetconfig(confighandle)) != NULL) { if (strcmp(nconf->nc_protofmly, NC_INET) == 0) { if (strcmp(nconf->nc_proto, NC_TCP) == 0 && netid_tcp == NULL) { netid_tcp = strdup(nconf->nc_netid); if (main_thread) netid_tcp_main = netid_tcp; else thr_setspecific(tcp_key, (void *) netid_tcp); } else if (strcmp(nconf->nc_proto, NC_UDP) == 0 && netid_udp == NULL) { netid_udp = strdup(nconf->nc_netid); if (main_thread) netid_udp_main = netid_udp; else thr_setspecific(udp_key, (void *) netid_udp); } } } endnetconfig(confighandle); } if (strcmp(nettype, "udp") == 0) netid = netid_udp; else if (strcmp(nettype, "tcp") == 0) netid = netid_tcp; else { return (NULL); } if ((netid == NULL) || (netid[0] == 0)) { return (NULL); } dummy = getnetconfigent(netid); return (dummy); } /* * Returns the type of the nettype, which should then be used with * __rpc_getconf(). */ void * -__rpc_setconf(nettype) - const char *nettype; +__rpc_setconf(const char *nettype) { struct handle *handle; handle = (struct handle *) malloc(sizeof (struct handle)); if (handle == NULL) { return (NULL); } switch (handle->nettype = getnettype(nettype)) { case _RPC_NETPATH: case _RPC_CIRCUIT_N: case _RPC_DATAGRAM_N: if (!(handle->nhandle = setnetpath())) goto failed; handle->nflag = TRUE; break; case _RPC_VISIBLE: case _RPC_CIRCUIT_V: case _RPC_DATAGRAM_V: case _RPC_TCP: case _RPC_UDP: if (!(handle->nhandle = setnetconfig())) { syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); goto failed; } handle->nflag = FALSE; break; default: goto failed; } return (handle); failed: free(handle); return (NULL); } /* * Returns the next netconfig struct for the given "net" type. * __rpc_setconf() should have been called previously. */ struct netconfig * -__rpc_getconf(vhandle) - void *vhandle; +__rpc_getconf(void *vhandle) { struct handle *handle; struct netconfig *nconf; handle = (struct handle *)vhandle; if (handle == NULL) { return (NULL); } for (;;) { if (handle->nflag) nconf = getnetpath(handle->nhandle); else nconf = getnetconfig(handle->nhandle); if (nconf == NULL) break; if ((nconf->nc_semantics != NC_TPI_CLTS) && (nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) continue; switch (handle->nettype) { case _RPC_VISIBLE: if (!(nconf->nc_flag & NC_VISIBLE)) continue; /* FALLTHROUGH */ case _RPC_NETPATH: /* Be happy */ break; case _RPC_CIRCUIT_V: if (!(nconf->nc_flag & NC_VISIBLE)) continue; /* FALLTHROUGH */ case _RPC_CIRCUIT_N: if ((nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) continue; break; case _RPC_DATAGRAM_V: if (!(nconf->nc_flag & NC_VISIBLE)) continue; /* FALLTHROUGH */ case _RPC_DATAGRAM_N: if (nconf->nc_semantics != NC_TPI_CLTS) continue; break; case _RPC_TCP: if (((nconf->nc_semantics != NC_TPI_COTS) && (nconf->nc_semantics != NC_TPI_COTS_ORD)) || (strcmp(nconf->nc_protofmly, NC_INET) #ifdef INET6 && strcmp(nconf->nc_protofmly, NC_INET6)) #else ) #endif || strcmp(nconf->nc_proto, NC_TCP)) continue; break; case _RPC_UDP: if ((nconf->nc_semantics != NC_TPI_CLTS) || (strcmp(nconf->nc_protofmly, NC_INET) #ifdef INET6 && strcmp(nconf->nc_protofmly, NC_INET6)) #else ) #endif || strcmp(nconf->nc_proto, NC_UDP)) continue; break; } break; } return (nconf); } void -__rpc_endconf(vhandle) - void * vhandle; +__rpc_endconf(void *vhandle) { struct handle *handle; handle = (struct handle *) vhandle; if (handle == NULL) { return; } if (handle->nflag) { endnetpath(handle->nhandle); } else { endnetconfig(handle->nhandle); } free(handle); } /* * Used to ping the NULL procedure for clnt handle. * Returns NULL if fails, else a non-NULL pointer. */ void * -rpc_nullproc(clnt) - CLIENT *clnt; +rpc_nullproc(CLIENT *clnt) { struct timeval TIMEOUT = {25, 0}; if (clnt_call(clnt, NULLPROC, (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_void, NULL, TIMEOUT) != RPC_SUCCESS) { return (NULL); } return ((void *) clnt); } /* * Try all possible transports until * one succeeds in finding the netconf for the given fd. */ struct netconfig * -__rpcgettp(fd) - int fd; +__rpcgettp(int fd) { const char *netid; struct __rpc_sockinfo si; if (!__rpc_fd2sockinfo(fd, &si)) return NULL; if (!__rpc_sockinfo2netid(&si, &netid)) return NULL; /*LINTED const castaway*/ return getnetconfigent((char *)netid); } int __rpc_fd2sockinfo(int fd, struct __rpc_sockinfo *sip) { socklen_t len; int type, proto; struct sockaddr_storage ss; len = sizeof ss; if (_getsockname(fd, (struct sockaddr *)(void *)&ss, &len) < 0) return 0; sip->si_alen = len; len = sizeof type; if (_getsockopt(fd, SOL_SOCKET, SO_TYPE, &type, &len) < 0) return 0; /* XXX */ if (ss.ss_family != AF_LOCAL) { if (type == SOCK_STREAM) proto = IPPROTO_TCP; else if (type == SOCK_DGRAM) proto = IPPROTO_UDP; else return 0; } else proto = 0; sip->si_af = ss.ss_family; sip->si_proto = proto; sip->si_socktype = type; return 1; } /* * Linear search, but the number of entries is small. */ int __rpc_nconf2sockinfo(const struct netconfig *nconf, struct __rpc_sockinfo *sip) { int i; for (i = 0; i < (sizeof na_cvt) / (sizeof (struct netid_af)); i++) if (strcmp(na_cvt[i].netid, nconf->nc_netid) == 0 || ( strcmp(nconf->nc_netid, "unix") == 0 && strcmp(na_cvt[i].netid, "local") == 0)) { sip->si_af = na_cvt[i].af; sip->si_proto = na_cvt[i].protocol; sip->si_socktype = __rpc_seman2socktype((int)nconf->nc_semantics); if (sip->si_socktype == -1) return 0; sip->si_alen = __rpc_get_a_size(sip->si_af); return 1; } return 0; } int __rpc_nconf2fd(const struct netconfig *nconf) { struct __rpc_sockinfo si; if (!__rpc_nconf2sockinfo(nconf, &si)) return 0; return _socket(si.si_af, si.si_socktype, si.si_proto); } int __rpc_sockinfo2netid(struct __rpc_sockinfo *sip, const char **netid) { int i; struct netconfig *nconf; nconf = getnetconfigent("local"); for (i = 0; i < (sizeof na_cvt) / (sizeof (struct netid_af)); i++) { if (na_cvt[i].af == sip->si_af && na_cvt[i].protocol == sip->si_proto) { if (strcmp(na_cvt[i].netid, "local") == 0 && nconf == NULL) { if (netid) *netid = "unix"; } else { if (netid) *netid = na_cvt[i].netid; } if (nconf != NULL) freenetconfigent(nconf); return 1; } } if (nconf != NULL) freenetconfigent(nconf); return 0; } char * taddr2uaddr(const struct netconfig *nconf, const struct netbuf *nbuf) { struct __rpc_sockinfo si; if (!__rpc_nconf2sockinfo(nconf, &si)) return NULL; return __rpc_taddr2uaddr_af(si.si_af, nbuf); } struct netbuf * uaddr2taddr(const struct netconfig *nconf, const char *uaddr) { struct __rpc_sockinfo si; if (!__rpc_nconf2sockinfo(nconf, &si)) return NULL; return __rpc_uaddr2taddr_af(si.si_af, uaddr); } char * __rpc_taddr2uaddr_af(int af, const struct netbuf *nbuf) { char *ret; struct sockaddr_in *sin; struct sockaddr_un *sun; char namebuf[INET_ADDRSTRLEN]; #ifdef INET6 struct sockaddr_in6 *sin6; char namebuf6[INET6_ADDRSTRLEN]; #endif u_int16_t port; switch (af) { case AF_INET: sin = nbuf->buf; if (inet_ntop(af, &sin->sin_addr, namebuf, sizeof namebuf) == NULL) return NULL; port = ntohs(sin->sin_port); if (asprintf(&ret, "%s.%u.%u", namebuf, ((u_int32_t)port) >> 8, port & 0xff) < 0) return NULL; break; #ifdef INET6 case AF_INET6: sin6 = nbuf->buf; if (inet_ntop(af, &sin6->sin6_addr, namebuf6, sizeof namebuf6) == NULL) return NULL; port = ntohs(sin6->sin6_port); if (asprintf(&ret, "%s.%u.%u", namebuf6, ((u_int32_t)port) >> 8, port & 0xff) < 0) return NULL; break; #endif case AF_LOCAL: sun = nbuf->buf; if (asprintf(&ret, "%.*s", (int)(sun->sun_len - offsetof(struct sockaddr_un, sun_path)), sun->sun_path) < 0) return (NULL); break; default: return NULL; } return ret; } struct netbuf * __rpc_uaddr2taddr_af(int af, const char *uaddr) { struct netbuf *ret = NULL; char *addrstr, *p; unsigned port, portlo, porthi; struct sockaddr_in *sin; #ifdef INET6 struct sockaddr_in6 *sin6; #endif struct sockaddr_un *sun; port = 0; sin = NULL; addrstr = strdup(uaddr); if (addrstr == NULL) return NULL; /* * AF_LOCAL addresses are expected to be absolute * pathnames, anything else will be AF_INET or AF_INET6. */ if (*addrstr != '/') { p = strrchr(addrstr, '.'); if (p == NULL) goto out; portlo = (unsigned)atoi(p + 1); *p = '\0'; p = strrchr(addrstr, '.'); if (p == NULL) goto out; porthi = (unsigned)atoi(p + 1); *p = '\0'; port = (porthi << 8) | portlo; } ret = (struct netbuf *)malloc(sizeof *ret); if (ret == NULL) goto out; switch (af) { case AF_INET: sin = (struct sockaddr_in *)malloc(sizeof *sin); if (sin == NULL) goto out; memset(sin, 0, sizeof *sin); sin->sin_family = AF_INET; sin->sin_port = htons(port); if (inet_pton(AF_INET, addrstr, &sin->sin_addr) <= 0) { free(sin); free(ret); ret = NULL; goto out; } sin->sin_len = ret->maxlen = ret->len = sizeof *sin; ret->buf = sin; break; #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)malloc(sizeof *sin6); if (sin6 == NULL) goto out; memset(sin6, 0, sizeof *sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); if (inet_pton(AF_INET6, addrstr, &sin6->sin6_addr) <= 0) { free(sin6); free(ret); ret = NULL; goto out; } sin6->sin6_len = ret->maxlen = ret->len = sizeof *sin6; ret->buf = sin6; break; #endif case AF_LOCAL: sun = (struct sockaddr_un *)malloc(sizeof *sun); if (sun == NULL) goto out; memset(sun, 0, sizeof *sun); sun->sun_family = AF_LOCAL; strncpy(sun->sun_path, addrstr, sizeof(sun->sun_path) - 1); ret->len = ret->maxlen = sun->sun_len = SUN_LEN(sun); ret->buf = sun; break; default: break; } out: free(addrstr); return ret; } int __rpc_seman2socktype(int semantics) { switch (semantics) { case NC_TPI_CLTS: return SOCK_DGRAM; case NC_TPI_COTS_ORD: return SOCK_STREAM; case NC_TPI_RAW: return SOCK_RAW; default: break; } return -1; } int __rpc_socktype2seman(int socktype) { switch (socktype) { case SOCK_DGRAM: return NC_TPI_CLTS; case SOCK_STREAM: return NC_TPI_COTS_ORD; case SOCK_RAW: return NC_TPI_RAW; default: break; } return -1; } /* * XXXX - IPv6 scope IDs can't be handled in universal addresses. * Here, we compare the original server address to that of the RPC * service we just received back from a call to rpcbind on the remote * machine. If they are both "link local" or "site local", copy * the scope id of the server address over to the service address. */ int __rpc_fixup_addr(struct netbuf *new, const struct netbuf *svc) { #ifdef INET6 struct sockaddr *sa_new, *sa_svc; struct sockaddr_in6 *sin6_new, *sin6_svc; sa_svc = (struct sockaddr *)svc->buf; sa_new = (struct sockaddr *)new->buf; if (sa_new->sa_family == sa_svc->sa_family && sa_new->sa_family == AF_INET6) { sin6_new = (struct sockaddr_in6 *)new->buf; sin6_svc = (struct sockaddr_in6 *)svc->buf; if ((IN6_IS_ADDR_LINKLOCAL(&sin6_new->sin6_addr) && IN6_IS_ADDR_LINKLOCAL(&sin6_svc->sin6_addr)) || (IN6_IS_ADDR_SITELOCAL(&sin6_new->sin6_addr) && IN6_IS_ADDR_SITELOCAL(&sin6_svc->sin6_addr))) { sin6_new->sin6_scope_id = sin6_svc->sin6_scope_id; } } #endif return 1; } int __rpc_sockisbound(int fd) { struct sockaddr_storage ss; socklen_t slen; slen = sizeof (struct sockaddr_storage); if (_getsockname(fd, (struct sockaddr *)(void *)&ss, &slen) < 0) return 0; switch (ss.ss_family) { case AF_INET: return (((struct sockaddr_in *) (void *)&ss)->sin_port != 0); #ifdef INET6 case AF_INET6: return (((struct sockaddr_in6 *) (void *)&ss)->sin6_port != 0); #endif case AF_LOCAL: /* XXX check this */ return (((struct sockaddr_un *) (void *)&ss)->sun_path[0] != '\0'); default: break; } return 0; } Index: projects/clang370-import/lib/libc/rpc/rpc_prot.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpc_prot.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpc_prot.c (revision 288126) @@ -1,357 +1,353 @@ /* $NetBSD: rpc_prot.c,v 1.16 2000/06/02 23:11:13 fvdl Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)rpc_prot.c 1.36 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)rpc_prot.c 2.3 88/08/07 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * rpc_prot.c * * Copyright (C) 1984, Sun Microsystems, Inc. * * This set of routines implements the rpc message definition, * its serializer and some common rpc utility routines. * The routines are meant for various implementations of rpc - * they are NOT for the rpc client or rpc service implementations! * Because authentication stuff is easy and is part of rpc, the opaque * routines are also in this program. */ #include "namespace.h" #include #include #include #include "un-namespace.h" static void accepted(enum accept_stat, struct rpc_err *); static void rejected(enum reject_stat, struct rpc_err *); /* * * * * * * * * * * * * * XDR Authentication * * * * * * * * * * * */ extern struct opaque_auth _null_auth; /* * XDR an opaque authentication struct * (see auth.h) */ bool_t xdr_opaque_auth(XDR *xdrs, struct opaque_auth *ap) { assert(xdrs != NULL); assert(ap != NULL); if (xdr_enum(xdrs, &(ap->oa_flavor))) return (xdr_bytes(xdrs, &ap->oa_base, &ap->oa_length, MAX_AUTH_BYTES)); return (FALSE); } /* * XDR a DES block */ bool_t xdr_des_block(XDR *xdrs, des_block *blkp) { assert(xdrs != NULL); assert(blkp != NULL); return (xdr_opaque(xdrs, (caddr_t)(void *)blkp, sizeof(des_block))); } /* * * * * * * * * * * * * * XDR RPC MESSAGE * * * * * * * * * * * * * * * */ /* * XDR the MSG_ACCEPTED part of a reply message union */ bool_t xdr_accepted_reply(XDR *xdrs, struct accepted_reply *ar) { enum accept_stat *par_stat; assert(xdrs != NULL); assert(ar != NULL); par_stat = &ar->ar_stat; /* personalized union, rather than calling xdr_union */ if (! xdr_opaque_auth(xdrs, &(ar->ar_verf))) return (FALSE); if (! xdr_enum(xdrs, (enum_t *) par_stat)) return (FALSE); switch (ar->ar_stat) { case SUCCESS: return ((*(ar->ar_results.proc))(xdrs, ar->ar_results.where)); case PROG_MISMATCH: if (! xdr_u_int32_t(xdrs, &(ar->ar_vers.low))) return (FALSE); return (xdr_u_int32_t(xdrs, &(ar->ar_vers.high))); case GARBAGE_ARGS: case SYSTEM_ERR: case PROC_UNAVAIL: case PROG_UNAVAIL: break; } return (TRUE); /* TRUE => open ended set of problems */ } /* * XDR the MSG_DENIED part of a reply message union */ bool_t xdr_rejected_reply(XDR *xdrs, struct rejected_reply *rr) { enum reject_stat *prj_stat; enum auth_stat *prj_why; assert(xdrs != NULL); assert(rr != NULL); prj_stat = &rr->rj_stat; /* personalized union, rather than calling xdr_union */ if (! xdr_enum(xdrs, (enum_t *) prj_stat)) return (FALSE); switch (rr->rj_stat) { case RPC_MISMATCH: if (! xdr_u_int32_t(xdrs, &(rr->rj_vers.low))) return (FALSE); return (xdr_u_int32_t(xdrs, &(rr->rj_vers.high))); case AUTH_ERROR: prj_why = &rr->rj_why; return (xdr_enum(xdrs, (enum_t *) prj_why)); } /* NOTREACHED */ assert(0); return (FALSE); } static const struct xdr_discrim reply_dscrm[3] = { { (int)MSG_ACCEPTED, (xdrproc_t)xdr_accepted_reply }, { (int)MSG_DENIED, (xdrproc_t)xdr_rejected_reply }, { __dontcare__, NULL_xdrproc_t } }; /* * XDR a reply message */ bool_t xdr_replymsg(XDR *xdrs, struct rpc_msg *rmsg) { enum msg_type *prm_direction; enum reply_stat *prp_stat; assert(xdrs != NULL); assert(rmsg != NULL); prm_direction = &rmsg->rm_direction; prp_stat = &rmsg->rm_reply.rp_stat; if ( xdr_u_int32_t(xdrs, &(rmsg->rm_xid)) && xdr_enum(xdrs, (enum_t *) prm_direction) && (rmsg->rm_direction == REPLY) ) return (xdr_union(xdrs, (enum_t *) prp_stat, (caddr_t)(void *)&(rmsg->rm_reply.ru), reply_dscrm, NULL_xdrproc_t)); return (FALSE); } /* * Serializes the "static part" of a call message header. * The fields include: rm_xid, rm_direction, rpcvers, prog, and vers. * The rm_xid is not really static, but the user can easily munge on the fly. */ bool_t xdr_callhdr(XDR *xdrs, struct rpc_msg *cmsg) { enum msg_type *prm_direction; assert(xdrs != NULL); assert(cmsg != NULL); prm_direction = &cmsg->rm_direction; cmsg->rm_direction = CALL; cmsg->rm_call.cb_rpcvers = RPC_MSG_VERSION; if ( (xdrs->x_op == XDR_ENCODE) && xdr_u_int32_t(xdrs, &(cmsg->rm_xid)) && xdr_enum(xdrs, (enum_t *) prm_direction) && xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_rpcvers)) && xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_prog)) ) return (xdr_u_int32_t(xdrs, &(cmsg->rm_call.cb_vers))); return (FALSE); } /* ************************** Client utility routine ************* */ static void accepted(enum accept_stat acpt_stat, struct rpc_err *error) { assert(error != NULL); switch (acpt_stat) { case PROG_UNAVAIL: error->re_status = RPC_PROGUNAVAIL; return; case PROG_MISMATCH: error->re_status = RPC_PROGVERSMISMATCH; return; case PROC_UNAVAIL: error->re_status = RPC_PROCUNAVAIL; return; case GARBAGE_ARGS: error->re_status = RPC_CANTDECODEARGS; return; case SYSTEM_ERR: error->re_status = RPC_SYSTEMERROR; return; case SUCCESS: error->re_status = RPC_SUCCESS; return; } /* NOTREACHED */ /* something's wrong, but we don't know what ... */ error->re_status = RPC_FAILED; error->re_lb.s1 = (int32_t)MSG_ACCEPTED; error->re_lb.s2 = (int32_t)acpt_stat; } static void -rejected(rjct_stat, error) - enum reject_stat rjct_stat; - struct rpc_err *error; +rejected(enum reject_stat rjct_stat, struct rpc_err *error) { assert(error != NULL); switch (rjct_stat) { case RPC_MISMATCH: error->re_status = RPC_VERSMISMATCH; return; case AUTH_ERROR: error->re_status = RPC_AUTHERROR; return; } /* something's wrong, but we don't know what ... */ /* NOTREACHED */ error->re_status = RPC_FAILED; error->re_lb.s1 = (int32_t)MSG_DENIED; error->re_lb.s2 = (int32_t)rjct_stat; } /* * given a reply message, fills in the error */ void -_seterr_reply(msg, error) - struct rpc_msg *msg; - struct rpc_err *error; +_seterr_reply(struct rpc_msg *msg, struct rpc_err *error) { assert(msg != NULL); assert(error != NULL); /* optimized for normal, SUCCESSful case */ switch (msg->rm_reply.rp_stat) { case MSG_ACCEPTED: if (msg->acpted_rply.ar_stat == SUCCESS) { error->re_status = RPC_SUCCESS; return; } accepted(msg->acpted_rply.ar_stat, error); break; case MSG_DENIED: rejected(msg->rjcted_rply.rj_stat, error); break; default: error->re_status = RPC_FAILED; error->re_lb.s1 = (int32_t)(msg->rm_reply.rp_stat); break; } switch (error->re_status) { case RPC_VERSMISMATCH: error->re_vers.low = msg->rjcted_rply.rj_vers.low; error->re_vers.high = msg->rjcted_rply.rj_vers.high; break; case RPC_AUTHERROR: error->re_why = msg->rjcted_rply.rj_why; break; case RPC_PROGVERSMISMATCH: error->re_vers.low = msg->acpted_rply.ar_vers.low; error->re_vers.high = msg->acpted_rply.ar_vers.high; break; case RPC_FAILED: case RPC_SUCCESS: case RPC_PROGNOTREGISTERED: case RPC_PMAPFAILURE: case RPC_UNKNOWNPROTO: case RPC_UNKNOWNHOST: case RPC_SYSTEMERROR: case RPC_CANTDECODEARGS: case RPC_PROCUNAVAIL: case RPC_PROGUNAVAIL: case RPC_TIMEDOUT: case RPC_CANTRECV: case RPC_CANTSEND: case RPC_CANTDECODERES: case RPC_CANTENCODEARGS: default: break; } } Index: projects/clang370-import/lib/libc/rpc/rpc_soc.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpc_soc.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpc_soc.c (revision 288126) @@ -1,534 +1,534 @@ /* $NetBSD: rpc_soc.c,v 1.6 2000/07/06 03:10:35 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* #ident "@(#)rpc_soc.c 1.17 94/04/24 SMI" */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. * In addition, portions of such source code were derived from Berkeley * 4.3 BSD under license from the Regents of the University of * California. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rpc_soc.c 1.41 89/05/02 Copyr 1988 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); #ifdef PORTMAP /* * rpc_soc.c * * The backward compatibility routines for the earlier implementation * of RPC, where the only transports supported were tcp/ip and udp/ip. * Based on berkeley socket abstraction, now implemented on the top * of TLI/Streams */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #include "mt_misc.h" static CLIENT *clnt_com_create(struct sockaddr_in *, rpcprog_t, rpcvers_t, int *, u_int, u_int, char *); static SVCXPRT *svc_com_create(int, u_int, u_int, char *); static bool_t rpc_wrap_bcast(char *, struct netbuf *, struct netconfig *); /* XXX */ #define IN4_LOCALHOST_STRING "127.0.0.1" #define IN6_LOCALHOST_STRING "::1" /* * A common clnt create routine */ static CLIENT * clnt_com_create(struct sockaddr_in *raddr, rpcprog_t prog, rpcvers_t vers, int *sockp, u_int sendsz, u_int recvsz, char *tp) { CLIENT *cl; int madefd = FALSE; int fd = *sockp; struct netconfig *nconf; struct netbuf bindaddr; mutex_lock(&rpcsoc_lock); if ((nconf = __rpc_getconfip(tp)) == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; mutex_unlock(&rpcsoc_lock); return (NULL); } if (fd == RPC_ANYSOCK) { fd = __rpc_nconf2fd(nconf); if (fd == -1) goto syserror; madefd = TRUE; } if (raddr->sin_port == 0) { u_int proto; u_short sport; mutex_unlock(&rpcsoc_lock); /* pmap_getport is recursive */ proto = strcmp(tp, "udp") == 0 ? IPPROTO_UDP : IPPROTO_TCP; sport = pmap_getport(raddr, (u_long)prog, (u_long)vers, proto); if (sport == 0) { goto err; } raddr->sin_port = htons(sport); mutex_lock(&rpcsoc_lock); /* pmap_getport is recursive */ } /* Transform sockaddr_in to netbuf */ bindaddr.maxlen = bindaddr.len = sizeof (struct sockaddr_in); bindaddr.buf = raddr; bindresvport(fd, NULL); cl = clnt_tli_create(fd, nconf, &bindaddr, prog, vers, sendsz, recvsz); if (cl) { if (madefd == TRUE) { /* * The fd should be closed while destroying the handle. */ (void) CLNT_CONTROL(cl, CLSET_FD_CLOSE, NULL); *sockp = fd; } (void) freenetconfigent(nconf); mutex_unlock(&rpcsoc_lock); return (cl); } goto err; syserror: rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; err: if (madefd == TRUE) (void)_close(fd); (void) freenetconfigent(nconf); mutex_unlock(&rpcsoc_lock); return (NULL); } CLIENT * clntudp_bufcreate(struct sockaddr_in *raddr, u_long prog, u_long vers, struct timeval wait, int *sockp, u_int sendsz, u_int recvsz) { CLIENT *cl; cl = clnt_com_create(raddr, (rpcprog_t)prog, (rpcvers_t)vers, sockp, sendsz, recvsz, "udp"); if (cl == NULL) { return (NULL); } (void) CLNT_CONTROL(cl, CLSET_RETRY_TIMEOUT, &wait); return (cl); } CLIENT * clntudp_create(struct sockaddr_in *raddr, u_long program, u_long version, struct timeval wait, int *sockp) { return clntudp_bufcreate(raddr, program, version, wait, sockp, UDPMSGSIZE, UDPMSGSIZE); } CLIENT * clnttcp_create(struct sockaddr_in *raddr, u_long prog, u_long vers, int *sockp, u_int sendsz, u_int recvsz) { return clnt_com_create(raddr, (rpcprog_t)prog, (rpcvers_t)vers, sockp, sendsz, recvsz, "tcp"); } CLIENT * clntraw_create(u_long prog, u_long vers) { return clnt_raw_create((rpcprog_t)prog, (rpcvers_t)vers); } /* * A common server create routine */ static SVCXPRT * svc_com_create(int fd, u_int sendsize, u_int recvsize, char *netid) { struct netconfig *nconf; SVCXPRT *svc; int madefd = FALSE; int port; struct sockaddr_in sin; if ((nconf = __rpc_getconfip(netid)) == NULL) { (void) syslog(LOG_ERR, "Could not get %s transport", netid); return (NULL); } if (fd == RPC_ANYSOCK) { fd = __rpc_nconf2fd(nconf); if (fd == -1) { (void) freenetconfigent(nconf); (void) syslog(LOG_ERR, "svc%s_create: could not open connection", netid); return (NULL); } madefd = TRUE; } memset(&sin, 0, sizeof sin); sin.sin_family = AF_INET; bindresvport(fd, &sin); _listen(fd, SOMAXCONN); svc = svc_tli_create(fd, nconf, NULL, sendsize, recvsize); (void) freenetconfigent(nconf); if (svc == NULL) { if (madefd) (void)_close(fd); return (NULL); } port = (((struct sockaddr_in *)svc->xp_ltaddr.buf)->sin_port); svc->xp_port = ntohs(port); return (svc); } SVCXPRT * svctcp_create(int fd, u_int sendsize, u_int recvsize) { return svc_com_create(fd, sendsize, recvsize, "tcp"); } SVCXPRT * svcudp_bufcreate(int fd, u_int sendsz, u_int recvsz) { return svc_com_create(fd, sendsz, recvsz, "udp"); } SVCXPRT * svcfd_create(int fd, u_int sendsize, u_int recvsize) { return svc_fd_create(fd, sendsize, recvsize); } SVCXPRT * svcudp_create(int fd) { return svc_com_create(fd, UDPMSGSIZE, UDPMSGSIZE, "udp"); } SVCXPRT * -svcraw_create() +svcraw_create(void) { return svc_raw_create(); } int get_myaddress(struct sockaddr_in *addr) { memset((void *) addr, 0, sizeof(*addr)); addr->sin_family = AF_INET; addr->sin_port = htons(PMAPPORT); addr->sin_addr.s_addr = htonl(INADDR_LOOPBACK); return (0); } /* * For connectionless "udp" transport. Obsoleted by rpc_call(). */ int callrpc(const char *host, int prognum, int versnum, int procnum, xdrproc_t inproc, void *in, xdrproc_t outproc, void *out) { return (int)rpc_call(host, (rpcprog_t)prognum, (rpcvers_t)versnum, (rpcproc_t)procnum, inproc, in, outproc, out, "udp"); } /* * For connectionless kind of transport. Obsoleted by rpc_reg() */ int registerrpc(int prognum, int versnum, int procnum, char *(*progname)(char [UDPMSGSIZE]), xdrproc_t inproc, xdrproc_t outproc) { return rpc_reg((rpcprog_t)prognum, (rpcvers_t)versnum, (rpcproc_t)procnum, progname, inproc, outproc, "udp"); } /* * All the following clnt_broadcast stuff is convulated; it supports * the earlier calling style of the callback function */ static thread_key_t clnt_broadcast_key; static resultproc_t clnt_broadcast_result_main; static once_t clnt_broadcast_once = ONCE_INITIALIZER; static void clnt_broadcast_key_init(void) { thr_keycreate(&clnt_broadcast_key, free); } /* * Need to translate the netbuf address into sockaddr_in address. * Dont care about netid here. */ /* ARGSUSED */ static bool_t rpc_wrap_bcast(char *resultp, struct netbuf *addr, struct netconfig *nconf) /* * char *resultp; // results of the call * struct netbuf *addr; // address of the guy who responded * struct netconfig *nconf; // Netconf of the transport */ { resultproc_t clnt_broadcast_result; if (strcmp(nconf->nc_netid, "udp")) return (FALSE); if (thr_main()) clnt_broadcast_result = clnt_broadcast_result_main; else clnt_broadcast_result = (resultproc_t)thr_getspecific(clnt_broadcast_key); return (*clnt_broadcast_result)(resultp, (struct sockaddr_in *)addr->buf); } /* * Broadcasts on UDP transport. Obsoleted by rpc_broadcast(). */ enum clnt_stat clnt_broadcast(u_long prog, u_long vers, u_long proc, xdrproc_t xargs, void *argsp, xdrproc_t xresults, void *resultsp, resultproc_t eachresult) /* * u_long prog; // program number * u_long vers; // version number * u_long proc; // procedure number * xdrproc_t xargs; // xdr routine for args * void *argsp; // pointer to args * xdrproc_t xresults; // xdr routine for results * void *resultsp; // pointer to results * resultproc_t eachresult; // call with each result obtained */ { if (thr_main()) clnt_broadcast_result_main = eachresult; else { thr_once(&clnt_broadcast_once, clnt_broadcast_key_init); thr_setspecific(clnt_broadcast_key, (void *) eachresult); } return rpc_broadcast((rpcprog_t)prog, (rpcvers_t)vers, (rpcproc_t)proc, xargs, argsp, xresults, resultsp, (resultproc_t) rpc_wrap_bcast, "udp"); } /* * Create the client des authentication object. Obsoleted by * authdes_seccreate(). */ AUTH * authdes_create(char *servername, u_int window, struct sockaddr *syncaddr, des_block *ckey) /* * char *servername; // network name of server * u_int window; // time to live * struct sockaddr *syncaddr; // optional hostaddr to sync with * des_block *ckey; // optional conversation key to use */ { AUTH *dummy; AUTH *nauth; char hostname[NI_MAXHOST]; if (syncaddr) { /* * Change addr to hostname, because that is the way * new interface takes it. */ if (getnameinfo(syncaddr, syncaddr->sa_len, hostname, sizeof hostname, NULL, 0, 0) != 0) goto fallback; nauth = authdes_seccreate(servername, window, hostname, ckey); return (nauth); } fallback: dummy = authdes_seccreate(servername, window, NULL, ckey); return (dummy); } /* * Create a client handle for a unix connection. Obsoleted by clnt_vc_create() */ CLIENT * clntunix_create(struct sockaddr_un *raddr, u_long prog, u_long vers, int *sockp, u_int sendsz, u_int recvsz) { struct netbuf *svcaddr; CLIENT *cl; int len; cl = NULL; svcaddr = NULL; if ((raddr->sun_len == 0) || ((svcaddr = malloc(sizeof(struct netbuf))) == NULL ) || ((svcaddr->buf = malloc(sizeof(struct sockaddr_un))) == NULL)) { if (svcaddr != NULL) free(svcaddr); rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; return(cl); } if (*sockp < 0) { *sockp = _socket(AF_LOCAL, SOCK_STREAM, 0); len = raddr->sun_len = SUN_LEN(raddr); if ((*sockp < 0) || (_connect(*sockp, (struct sockaddr *)raddr, len) < 0)) { rpc_createerr.cf_stat = RPC_SYSTEMERROR; rpc_createerr.cf_error.re_errno = errno; if (*sockp != -1) (void)_close(*sockp); goto done; } } svcaddr->buf = raddr; svcaddr->len = raddr->sun_len; svcaddr->maxlen = sizeof (struct sockaddr_un); cl = clnt_vc_create(*sockp, svcaddr, prog, vers, sendsz, recvsz); done: free(svcaddr->buf); free(svcaddr); return(cl); } /* * Creates, registers, and returns a (rpc) unix based transporter. * Obsoleted by svc_vc_create(). */ SVCXPRT * svcunix_create(int sock, u_int sendsize, u_int recvsize, char *path) { struct netconfig *nconf; void *localhandle; struct sockaddr_un sun; struct sockaddr *sa; struct t_bind taddr; SVCXPRT *xprt; int addrlen; xprt = (SVCXPRT *)NULL; localhandle = setnetconfig(); while ((nconf = getnetconfig(localhandle)) != NULL) { if (nconf->nc_protofmly != NULL && strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) break; } if (nconf == NULL) return(xprt); if ((sock = __rpc_nconf2fd(nconf)) < 0) goto done; memset(&sun, 0, sizeof sun); sun.sun_family = AF_LOCAL; if (strlcpy(sun.sun_path, path, sizeof(sun.sun_path)) >= sizeof(sun.sun_path)) goto done; sun.sun_len = SUN_LEN(&sun); addrlen = sizeof (struct sockaddr_un); sa = (struct sockaddr *)&sun; if (_bind(sock, sa, addrlen) < 0) goto done; taddr.addr.len = taddr.addr.maxlen = addrlen; taddr.addr.buf = malloc(addrlen); if (taddr.addr.buf == NULL) goto done; memcpy(taddr.addr.buf, sa, addrlen); if (nconf->nc_semantics != NC_TPI_CLTS) { if (_listen(sock, SOMAXCONN) < 0) { free(taddr.addr.buf); goto done; } } xprt = (SVCXPRT *)svc_tli_create(sock, nconf, &taddr, sendsize, recvsize); done: endnetconfig(localhandle); return(xprt); } /* * Like svunix_create(), except the routine takes any *open* UNIX file * descriptor as its first input. Obsoleted by svc_fd_create(); */ SVCXPRT * svcunixfd_create(int fd, u_int sendsize, u_int recvsize) { return (svc_fd_create(fd, sendsize, recvsize)); } #endif /* PORTMAP */ Index: projects/clang370-import/lib/libc/rpc/rpcb_clnt.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpcb_clnt.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpcb_clnt.c (revision 288126) @@ -1,1312 +1,1311 @@ /* $NetBSD: rpcb_clnt.c,v 1.6 2000/07/16 06:41:43 itojun Exp $ */ /*- * Copyright (c) 2010, Oracle America, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the "Oracle America, Inc." nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* #ident "@(#)rpcb_clnt.c 1.27 94/04/24 SMI" */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rpcb_clnt.c 1.30 89/06/21 Copyr 1988 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * rpcb_clnt.c * interface to rpcbind rpc service. */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #ifdef PORTMAP #include /* FOR IPPROTO_TCP/UDP definitions */ #include #endif /* PORTMAP */ #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #include "mt_misc.h" static struct timeval tottimeout = { 60, 0 }; static const struct timeval rmttimeout = { 3, 0 }; static struct timeval rpcbrmttime = { 15, 0 }; extern bool_t xdr_wrapstring(XDR *, char **); static const char nullstring[] = "\000"; #define CACHESIZE 6 struct address_cache { char *ac_host; char *ac_netid; char *ac_uaddr; struct netbuf *ac_taddr; struct address_cache *ac_next; }; static struct address_cache *front; static int cachesize; #define CLCR_GET_RPCB_TIMEOUT 1 #define CLCR_SET_RPCB_TIMEOUT 2 extern int __rpc_lowvers; static struct address_cache *check_cache(const char *, const char *); static void delete_cache(struct netbuf *); static void add_cache(const char *, const char *, struct netbuf *, char *); static CLIENT *getclnthandle(const char *, const struct netconfig *, char **); static CLIENT *local_rpcb(void); static struct netbuf *got_entry(rpcb_entry_list_ptr, const struct netconfig *); /* * This routine adjusts the timeout used for calls to the remote rpcbind. * Also, this routine can be used to set the use of portmapper version 2 * only when doing rpc_broadcasts * These are private routines that may not be provided in future releases. */ bool_t __rpc_control(int request, void *info) { switch (request) { case CLCR_GET_RPCB_TIMEOUT: *(struct timeval *)info = tottimeout; break; case CLCR_SET_RPCB_TIMEOUT: tottimeout = *(struct timeval *)info; break; case CLCR_SET_LOWVERS: __rpc_lowvers = *(int *)info; break; case CLCR_GET_LOWVERS: *(int *)info = __rpc_lowvers; break; default: return (FALSE); } return (TRUE); } /* * It might seem that a reader/writer lock would be more reasonable here. * However because getclnthandle(), the only user of the cache functions, * may do a delete_cache() operation if a check_cache() fails to return an * address useful to clnt_tli_create(), we may as well use a mutex. */ /* * As it turns out, if the cache lock is *not* a reader/writer lock, we will * block all clnt_create's if we are trying to connect to a host that's down, * since the lock will be held all during that time. */ /* * The routines check_cache(), add_cache(), delete_cache() manage the * cache of rpcbind addresses for (host, netid). */ static struct address_cache * check_cache(const char *host, const char *netid) { struct address_cache *cptr; /* READ LOCK HELD ON ENTRY: rpcbaddr_cache_lock */ for (cptr = front; cptr != NULL; cptr = cptr->ac_next) { if (!strcmp(cptr->ac_host, host) && !strcmp(cptr->ac_netid, netid)) { #ifdef ND_DEBUG fprintf(stderr, "Found cache entry for %s: %s\n", host, netid); #endif return (cptr); } } return ((struct address_cache *) NULL); } static void delete_cache(struct netbuf *addr) { struct address_cache *cptr, *prevptr = NULL; /* WRITE LOCK HELD ON ENTRY: rpcbaddr_cache_lock */ for (cptr = front; cptr != NULL; cptr = cptr->ac_next) { if (!memcmp(cptr->ac_taddr->buf, addr->buf, addr->len)) { free(cptr->ac_host); free(cptr->ac_netid); free(cptr->ac_taddr->buf); free(cptr->ac_taddr); if (cptr->ac_uaddr) free(cptr->ac_uaddr); if (prevptr) prevptr->ac_next = cptr->ac_next; else front = cptr->ac_next; free(cptr); cachesize--; break; } prevptr = cptr; } } static void -add_cache(host, netid, taddr, uaddr) - const char *host, *netid; - char *uaddr; - struct netbuf *taddr; +add_cache(const char *host, const char *netid, struct netbuf *taddr, + char *uaddr) { struct address_cache *ad_cache, *cptr, *prevptr; ad_cache = (struct address_cache *) malloc(sizeof (struct address_cache)); if (!ad_cache) { return; } ad_cache->ac_host = strdup(host); ad_cache->ac_netid = strdup(netid); ad_cache->ac_uaddr = uaddr ? strdup(uaddr) : NULL; ad_cache->ac_taddr = (struct netbuf *)malloc(sizeof (struct netbuf)); if (!ad_cache->ac_host || !ad_cache->ac_netid || !ad_cache->ac_taddr || (uaddr && !ad_cache->ac_uaddr)) { goto out; } ad_cache->ac_taddr->len = ad_cache->ac_taddr->maxlen = taddr->len; ad_cache->ac_taddr->buf = (char *) malloc(taddr->len); if (ad_cache->ac_taddr->buf == NULL) { out: if (ad_cache->ac_host) free(ad_cache->ac_host); if (ad_cache->ac_netid) free(ad_cache->ac_netid); if (ad_cache->ac_uaddr) free(ad_cache->ac_uaddr); if (ad_cache->ac_taddr) free(ad_cache->ac_taddr); free(ad_cache); return; } memcpy(ad_cache->ac_taddr->buf, taddr->buf, taddr->len); #ifdef ND_DEBUG fprintf(stderr, "Added to cache: %s : %s\n", host, netid); #endif /* VARIABLES PROTECTED BY rpcbaddr_cache_lock: cptr */ rwlock_wrlock(&rpcbaddr_cache_lock); if (cachesize < CACHESIZE) { ad_cache->ac_next = front; front = ad_cache; cachesize++; } else { /* Free the last entry */ cptr = front; prevptr = NULL; while (cptr->ac_next) { prevptr = cptr; cptr = cptr->ac_next; } #ifdef ND_DEBUG fprintf(stderr, "Deleted from cache: %s : %s\n", cptr->ac_host, cptr->ac_netid); #endif free(cptr->ac_host); free(cptr->ac_netid); free(cptr->ac_taddr->buf); free(cptr->ac_taddr); if (cptr->ac_uaddr) free(cptr->ac_uaddr); if (prevptr) { prevptr->ac_next = NULL; ad_cache->ac_next = front; front = ad_cache; } else { front = ad_cache; ad_cache->ac_next = NULL; } free(cptr); } rwlock_unlock(&rpcbaddr_cache_lock); } /* * This routine will return a client handle that is connected to the * rpcbind. If targaddr is non-NULL, the "universal address" of the * host will be stored in *targaddr; the caller is responsible for * freeing this string. * On error, returns NULL and free's everything. */ static CLIENT * getclnthandle(const char *host, const struct netconfig *nconf, char **targaddr) { CLIENT *client; struct netbuf *addr, taddr; struct netbuf addr_to_delete; struct __rpc_sockinfo si; struct addrinfo hints, *res, *tres; struct address_cache *ad_cache; char *tmpaddr; /* VARIABLES PROTECTED BY rpcbaddr_cache_lock: ad_cache */ /* Get the address of the rpcbind. Check cache first */ client = NULL; addr_to_delete.len = 0; rwlock_rdlock(&rpcbaddr_cache_lock); ad_cache = NULL; if (host != NULL) ad_cache = check_cache(host, nconf->nc_netid); if (ad_cache != NULL) { addr = ad_cache->ac_taddr; client = clnt_tli_create(RPC_ANYFD, nconf, addr, (rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS4, 0, 0); if (client != NULL) { if (targaddr) *targaddr = strdup(ad_cache->ac_uaddr); rwlock_unlock(&rpcbaddr_cache_lock); return (client); } addr_to_delete.len = addr->len; addr_to_delete.buf = (char *)malloc(addr->len); if (addr_to_delete.buf == NULL) { addr_to_delete.len = 0; } else { memcpy(addr_to_delete.buf, addr->buf, addr->len); } } rwlock_unlock(&rpcbaddr_cache_lock); if (addr_to_delete.len != 0) { /* * Assume this may be due to cache data being * outdated */ rwlock_wrlock(&rpcbaddr_cache_lock); delete_cache(&addr_to_delete); rwlock_unlock(&rpcbaddr_cache_lock); free(addr_to_delete.buf); } if (!__rpc_nconf2sockinfo(nconf, &si)) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return NULL; } memset(&hints, 0, sizeof hints); hints.ai_family = si.si_af; hints.ai_socktype = si.si_socktype; hints.ai_protocol = si.si_proto; #ifdef CLNT_DEBUG printf("trying netid %s family %d proto %d socktype %d\n", nconf->nc_netid, si.si_af, si.si_proto, si.si_socktype); #endif if (nconf->nc_protofmly != NULL && strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) { client = local_rpcb(); if (! client) { #ifdef ND_DEBUG clnt_pcreateerror("rpcbind clnt interface"); #endif return (NULL); } else { struct sockaddr_un sun; if (targaddr) { *targaddr = malloc(sizeof(sun.sun_path)); if (*targaddr == NULL) { CLNT_DESTROY(client); return (NULL); } strncpy(*targaddr, _PATH_RPCBINDSOCK, sizeof(sun.sun_path)); } return (client); } } else { if (getaddrinfo(host, "sunrpc", &hints, &res) != 0) { rpc_createerr.cf_stat = RPC_UNKNOWNHOST; return NULL; } } for (tres = res; tres != NULL; tres = tres->ai_next) { taddr.buf = tres->ai_addr; taddr.len = taddr.maxlen = tres->ai_addrlen; #ifdef ND_DEBUG { char *ua; ua = taddr2uaddr(nconf, &taddr); fprintf(stderr, "Got it [%s]\n", ua); free(ua); } #endif #ifdef ND_DEBUG { int i; fprintf(stderr, "\tnetbuf len = %d, maxlen = %d\n", taddr.len, taddr.maxlen); fprintf(stderr, "\tAddress is "); for (i = 0; i < taddr.len; i++) fprintf(stderr, "%u.", ((char *)(taddr.buf))[i]); fprintf(stderr, "\n"); } #endif client = clnt_tli_create(RPC_ANYFD, nconf, &taddr, (rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS4, 0, 0); #ifdef ND_DEBUG if (! client) { clnt_pcreateerror("rpcbind clnt interface"); } #endif if (client) { tmpaddr = targaddr ? taddr2uaddr(nconf, &taddr) : NULL; add_cache(host, nconf->nc_netid, &taddr, tmpaddr); if (targaddr) *targaddr = tmpaddr; break; } } if (res) freeaddrinfo(res); return (client); } /* XXX */ #define IN4_LOCALHOST_STRING "127.0.0.1" #define IN6_LOCALHOST_STRING "::1" /* * This routine will return a client handle that is connected to the local * rpcbind. Returns NULL on error and free's everything. */ static CLIENT * -local_rpcb() +local_rpcb(void) { CLIENT *client; static struct netconfig *loopnconf; static char *hostname; int sock; size_t tsize; struct netbuf nbuf; struct sockaddr_un sun; /* * Try connecting to the local rpcbind through a local socket * first. If this doesn't work, try all transports defined in * the netconfig file. */ memset(&sun, 0, sizeof sun); sock = _socket(AF_LOCAL, SOCK_STREAM, 0); if (sock < 0) goto try_nconf; sun.sun_family = AF_LOCAL; strcpy(sun.sun_path, _PATH_RPCBINDSOCK); nbuf.len = sun.sun_len = SUN_LEN(&sun); nbuf.maxlen = sizeof (struct sockaddr_un); nbuf.buf = &sun; tsize = __rpc_get_t_size(AF_LOCAL, 0, 0); client = clnt_vc_create(sock, &nbuf, (rpcprog_t)RPCBPROG, (rpcvers_t)RPCBVERS, tsize, tsize); if (client != NULL) { /* Mark the socket to be closed in destructor */ (void) CLNT_CONTROL(client, CLSET_FD_CLOSE, NULL); return client; } /* Nobody needs this socket anymore; free the descriptor. */ _close(sock); try_nconf: /* VARIABLES PROTECTED BY loopnconf_lock: loopnconf */ mutex_lock(&loopnconf_lock); if (loopnconf == NULL) { struct netconfig *nconf, *tmpnconf = NULL; void *nc_handle; int fd; nc_handle = setnetconfig(); if (nc_handle == NULL) { /* fails to open netconfig file */ syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; mutex_unlock(&loopnconf_lock); return (NULL); } while ((nconf = getnetconfig(nc_handle)) != NULL) { #ifdef INET6 if ((strcmp(nconf->nc_protofmly, NC_INET6) == 0 || #else if (( #endif strcmp(nconf->nc_protofmly, NC_INET) == 0) && (nconf->nc_semantics == NC_TPI_COTS || nconf->nc_semantics == NC_TPI_COTS_ORD)) { fd = __rpc_nconf2fd(nconf); /* * Can't create a socket, assume that * this family isn't configured in the kernel. */ if (fd < 0) continue; _close(fd); tmpnconf = nconf; if (!strcmp(nconf->nc_protofmly, NC_INET)) hostname = IN4_LOCALHOST_STRING; else hostname = IN6_LOCALHOST_STRING; } } if (tmpnconf == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; mutex_unlock(&loopnconf_lock); return (NULL); } loopnconf = getnetconfigent(tmpnconf->nc_netid); /* loopnconf is never freed */ endnetconfig(nc_handle); } mutex_unlock(&loopnconf_lock); client = getclnthandle(hostname, loopnconf, NULL); return (client); } /* * Set a mapping between program, version and address. * Calls the rpcbind service to do the mapping. * * nconf - Network structure of transport * address - Services netconfig address */ bool_t rpcb_set(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf, const struct netbuf *address) { CLIENT *client; bool_t rslt = FALSE; RPCB parms; char uidbuf[32]; /* parameter checking */ if (nconf == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return (FALSE); } if (address == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNADDR; return (FALSE); } client = local_rpcb(); if (! client) { return (FALSE); } /* convert to universal */ /*LINTED const castaway*/ parms.r_addr = taddr2uaddr((struct netconfig *) nconf, (struct netbuf *)address); if (!parms.r_addr) { CLNT_DESTROY(client); rpc_createerr.cf_stat = RPC_N2AXLATEFAILURE; return (FALSE); /* no universal address */ } parms.r_prog = program; parms.r_vers = version; parms.r_netid = nconf->nc_netid; /* * Though uid is not being used directly, we still send it for * completeness. For non-unix platforms, perhaps some other * string or an empty string can be sent. */ (void) snprintf(uidbuf, sizeof uidbuf, "%d", geteuid()); parms.r_owner = uidbuf; CLNT_CALL(client, (rpcproc_t)RPCBPROC_SET, (xdrproc_t) xdr_rpcb, (char *)(void *)&parms, (xdrproc_t) xdr_bool, (char *)(void *)&rslt, tottimeout); CLNT_DESTROY(client); free(parms.r_addr); return (rslt); } /* * Remove the mapping between program, version and netbuf address. * Calls the rpcbind service to do the un-mapping. * If netbuf is NULL, unset for all the transports, otherwise unset * only for the given transport. */ bool_t rpcb_unset(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf) { CLIENT *client; bool_t rslt = FALSE; RPCB parms; char uidbuf[32]; client = local_rpcb(); if (! client) { return (FALSE); } parms.r_prog = program; parms.r_vers = version; if (nconf) parms.r_netid = nconf->nc_netid; else { /*LINTED const castaway*/ parms.r_netid = (char *) &nullstring[0]; /* unsets all */ } /*LINTED const castaway*/ parms.r_addr = (char *) &nullstring[0]; (void) snprintf(uidbuf, sizeof uidbuf, "%d", geteuid()); parms.r_owner = uidbuf; CLNT_CALL(client, (rpcproc_t)RPCBPROC_UNSET, (xdrproc_t) xdr_rpcb, (char *)(void *)&parms, (xdrproc_t) xdr_bool, (char *)(void *)&rslt, tottimeout); CLNT_DESTROY(client); return (rslt); } /* * From the merged list, find the appropriate entry */ static struct netbuf * got_entry(rpcb_entry_list_ptr relp, const struct netconfig *nconf) { struct netbuf *na = NULL; rpcb_entry_list_ptr sp; rpcb_entry *rmap; for (sp = relp; sp != NULL; sp = sp->rpcb_entry_next) { rmap = &sp->rpcb_entry_map; if ((strcmp(nconf->nc_proto, rmap->r_nc_proto) == 0) && (strcmp(nconf->nc_protofmly, rmap->r_nc_protofmly) == 0) && (nconf->nc_semantics == rmap->r_nc_semantics) && (rmap->r_maddr != NULL) && (rmap->r_maddr[0] != 0)) { na = uaddr2taddr(nconf, rmap->r_maddr); #ifdef ND_DEBUG fprintf(stderr, "\tRemote address is [%s].\n", rmap->r_maddr); if (!na) fprintf(stderr, "\tCouldn't resolve remote address!\n"); #endif break; } } return (na); } /* * Quick check to see if rpcbind is up. Tries to connect over * local transport. */ static bool_t __rpcbind_is_up(void) { struct netconfig *nconf; struct sockaddr_un sun; void *localhandle; int sock; nconf = NULL; localhandle = setnetconfig(); while ((nconf = getnetconfig(localhandle)) != NULL) { if (nconf->nc_protofmly != NULL && strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) break; } if (nconf == NULL) return (FALSE); endnetconfig(localhandle); memset(&sun, 0, sizeof sun); sock = _socket(AF_LOCAL, SOCK_STREAM, 0); if (sock < 0) return (FALSE); sun.sun_family = AF_LOCAL; strncpy(sun.sun_path, _PATH_RPCBINDSOCK, sizeof(sun.sun_path)); sun.sun_len = SUN_LEN(&sun); if (_connect(sock, (struct sockaddr *)&sun, sun.sun_len) < 0) { _close(sock); return (FALSE); } _close(sock); return (TRUE); } /* * An internal function which optimizes rpcb_getaddr function. It also * returns the client handle that it uses to contact the remote rpcbind. * * The algorithm used: If the transports is TCP or UDP, it first tries * version 2 (portmap), 4 and then 3 (svr4). This order should be * changed in the next OS release to 4, 2 and 3. We are assuming that by * that time, version 4 would be available on many machines on the network. * With this algorithm, we get performance as well as a plan for * obsoleting version 2. * * For all other transports, the algorithm remains as 4 and then 3. * * XXX: Due to some problems with t_connect(), we do not reuse the same client * handle for COTS cases and hence in these cases we do not return the * client handle. This code will change if t_connect() ever * starts working properly. Also look under clnt_vc.c. */ struct netbuf * __rpcb_findaddr_timed(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf, const char *host, CLIENT **clpp, struct timeval *tp) { static bool_t check_rpcbind = TRUE; CLIENT *client = NULL; RPCB parms; enum clnt_stat clnt_st; char *ua = NULL; rpcvers_t vers; struct netbuf *address = NULL; rpcvers_t start_vers = RPCBVERS4; struct netbuf servaddr; /* parameter checking */ if (nconf == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return (NULL); } parms.r_addr = NULL; /* * Use default total timeout if no timeout is specified. */ if (tp == NULL) tp = &tottimeout; #ifdef PORTMAP /* Try version 2 for TCP or UDP */ if (strcmp(nconf->nc_protofmly, NC_INET) == 0) { u_short port = 0; struct netbuf remote; rpcvers_t pmapvers = 2; struct pmap pmapparms; /* * Try UDP only - there are some portmappers out * there that use UDP only. */ if (strcmp(nconf->nc_proto, NC_TCP) == 0) { struct netconfig *newnconf; if ((newnconf = getnetconfigent("udp")) == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return (NULL); } client = getclnthandle(host, newnconf, &parms.r_addr); freenetconfigent(newnconf); } else { client = getclnthandle(host, nconf, &parms.r_addr); } if (client == NULL) return (NULL); /* * Set version and retry timeout. */ CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)&rpcbrmttime); CLNT_CONTROL(client, CLSET_VERS, (char *)&pmapvers); pmapparms.pm_prog = program; pmapparms.pm_vers = version; pmapparms.pm_prot = strcmp(nconf->nc_proto, NC_TCP) ? IPPROTO_UDP : IPPROTO_TCP; pmapparms.pm_port = 0; /* not needed */ clnt_st = CLNT_CALL(client, (rpcproc_t)PMAPPROC_GETPORT, (xdrproc_t) xdr_pmap, (caddr_t)(void *)&pmapparms, (xdrproc_t) xdr_u_short, (caddr_t)(void *)&port, *tp); if (clnt_st != RPC_SUCCESS) { if ((clnt_st == RPC_PROGVERSMISMATCH) || (clnt_st == RPC_PROGUNAVAIL)) goto try_rpcbind; /* Try different versions */ rpc_createerr.cf_stat = RPC_PMAPFAILURE; clnt_geterr(client, &rpc_createerr.cf_error); goto error; } else if (port == 0) { address = NULL; rpc_createerr.cf_stat = RPC_PROGNOTREGISTERED; goto error; } port = htons(port); CLNT_CONTROL(client, CLGET_SVC_ADDR, (char *)&remote); if (((address = (struct netbuf *) malloc(sizeof (struct netbuf))) == NULL) || ((address->buf = (char *) malloc(remote.len)) == NULL)) { rpc_createerr.cf_stat = RPC_SYSTEMERROR; clnt_geterr(client, &rpc_createerr.cf_error); if (address) { free(address); address = NULL; } goto error; } memcpy(address->buf, remote.buf, remote.len); memcpy(&((char *)address->buf)[sizeof (short)], (char *)(void *)&port, sizeof (short)); address->len = address->maxlen = remote.len; goto done; } #endif /* PORTMAP */ try_rpcbind: /* * Check if rpcbind is up. This prevents needless delays when * accessing applications such as the keyserver while booting * disklessly. */ if (check_rpcbind && strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) { if (!__rpcbind_is_up()) { rpc_createerr.cf_stat = RPC_PMAPFAILURE; rpc_createerr.cf_error.re_errno = 0; goto error; } check_rpcbind = FALSE; } /* * Now we try version 4 and then 3. * We also send the remote system the address we used to * contact it in case it can help to connect back with us */ parms.r_prog = program; parms.r_vers = version; /*LINTED const castaway*/ parms.r_owner = (char *) &nullstring[0]; /* not needed; */ /* just for xdring */ parms.r_netid = nconf->nc_netid; /* not really needed */ /* * If a COTS transport is being used, try getting address via CLTS * transport. This works only with version 4. */ if (nconf->nc_semantics == NC_TPI_COTS_ORD || nconf->nc_semantics == NC_TPI_COTS) { void *handle; struct netconfig *nconf_clts; rpcb_entry_list_ptr relp = NULL; if (client == NULL) { /* This did not go through the above PORTMAP/TCP code */ if ((handle = __rpc_setconf("datagram_v")) != NULL) { while ((nconf_clts = __rpc_getconf(handle)) != NULL) { if (strcmp(nconf_clts->nc_protofmly, nconf->nc_protofmly) != 0) { continue; } client = getclnthandle(host, nconf_clts, &parms.r_addr); break; } __rpc_endconf(handle); } if (client == NULL) goto regular_rpcbind; /* Go the regular way */ } else { /* This is a UDP PORTMAP handle. Change to version 4 */ vers = RPCBVERS4; CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers); } /* * We also send the remote system the address we used to * contact it in case it can help it connect back with us */ if (parms.r_addr == NULL) { /*LINTED const castaway*/ parms.r_addr = (char *) &nullstring[0]; /* for XDRing */ } CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)&rpcbrmttime); clnt_st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETADDRLIST, (xdrproc_t) xdr_rpcb, (char *)(void *)&parms, (xdrproc_t) xdr_rpcb_entry_list_ptr, (char *)(void *)&relp, *tp); if (clnt_st == RPC_SUCCESS) { if ((address = got_entry(relp, nconf)) != NULL) { xdr_free((xdrproc_t) xdr_rpcb_entry_list_ptr, (char *)(void *)&relp); CLNT_CONTROL(client, CLGET_SVC_ADDR, (char *)(void *)&servaddr); __rpc_fixup_addr(address, &servaddr); goto done; } /* Entry not found for this transport */ xdr_free((xdrproc_t) xdr_rpcb_entry_list_ptr, (char *)(void *)&relp); /* * XXX: should have perhaps returned with error but * since the remote machine might not always be able * to send the address on all transports, we try the * regular way with regular_rpcbind */ goto regular_rpcbind; } else if ((clnt_st == RPC_PROGVERSMISMATCH) || (clnt_st == RPC_PROGUNAVAIL)) { start_vers = RPCBVERS; /* Try version 3 now */ goto regular_rpcbind; /* Try different versions */ } else { rpc_createerr.cf_stat = RPC_PMAPFAILURE; clnt_geterr(client, &rpc_createerr.cf_error); goto error; } } regular_rpcbind: /* Now the same transport is to be used to get the address */ if (client && ((nconf->nc_semantics == NC_TPI_COTS_ORD) || (nconf->nc_semantics == NC_TPI_COTS))) { /* A CLTS type of client - destroy it */ CLNT_DESTROY(client); client = NULL; } if (client == NULL) { client = getclnthandle(host, nconf, &parms.r_addr); if (client == NULL) { goto error; } } if (parms.r_addr == NULL) { /*LINTED const castaway*/ parms.r_addr = (char *) &nullstring[0]; } /* First try from start_vers and then version 3 (RPCBVERS) */ CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *) &rpcbrmttime); for (vers = start_vers; vers >= RPCBVERS; vers--) { /* Set the version */ CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers); clnt_st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETADDR, (xdrproc_t) xdr_rpcb, (char *)(void *)&parms, (xdrproc_t) xdr_wrapstring, (char *)(void *) &ua, *tp); if (clnt_st == RPC_SUCCESS) { if ((ua == NULL) || (ua[0] == 0)) { /* address unknown */ rpc_createerr.cf_stat = RPC_PROGNOTREGISTERED; goto error; } address = uaddr2taddr(nconf, ua); #ifdef ND_DEBUG fprintf(stderr, "\tRemote address is [%s]\n", ua); if (!address) fprintf(stderr, "\tCouldn't resolve remote address!\n"); #endif xdr_free((xdrproc_t)xdr_wrapstring, (char *)(void *)&ua); if (! address) { /* We don't know about your universal address */ rpc_createerr.cf_stat = RPC_N2AXLATEFAILURE; goto error; } CLNT_CONTROL(client, CLGET_SVC_ADDR, (char *)(void *)&servaddr); __rpc_fixup_addr(address, &servaddr); goto done; } else if (clnt_st == RPC_PROGVERSMISMATCH) { struct rpc_err rpcerr; clnt_geterr(client, &rpcerr); if (rpcerr.re_vers.low > RPCBVERS4) goto error; /* a new version, can't handle */ } else if (clnt_st != RPC_PROGUNAVAIL) { /* Cant handle this error */ rpc_createerr.cf_stat = clnt_st; clnt_geterr(client, &rpc_createerr.cf_error); goto error; } } error: if (client) { CLNT_DESTROY(client); client = NULL; } done: if (nconf->nc_semantics != NC_TPI_CLTS) { /* This client is the connectionless one */ if (client) { CLNT_DESTROY(client); client = NULL; } } if (clpp) { *clpp = client; } else if (client) { CLNT_DESTROY(client); } if (parms.r_addr != NULL && parms.r_addr != nullstring) free(parms.r_addr); return (address); } /* * Find the mapped address for program, version. * Calls the rpcbind service remotely to do the lookup. * Uses the transport specified in nconf. * Returns FALSE (0) if no map exists, else returns 1. * * Assuming that the address is all properly allocated */ int rpcb_getaddr(rpcprog_t program, rpcvers_t version, const struct netconfig *nconf, struct netbuf *address, const char *host) { struct netbuf *na; if ((na = __rpcb_findaddr_timed(program, version, (struct netconfig *) nconf, (char *) host, (CLIENT **) NULL, (struct timeval *) NULL)) == NULL) return (FALSE); if (na->len > address->maxlen) { /* Too long address */ free(na->buf); free(na); rpc_createerr.cf_stat = RPC_FAILED; return (FALSE); } memcpy(address->buf, na->buf, (size_t)na->len); address->len = na->len; free(na->buf); free(na); return (TRUE); } /* * Get a copy of the current maps. * Calls the rpcbind service remotely to get the maps. * * It returns only a list of the services * It returns NULL on failure. */ rpcblist * rpcb_getmaps(const struct netconfig *nconf, const char *host) { rpcblist_ptr head = NULL; CLIENT *client; enum clnt_stat clnt_st; rpcvers_t vers = 0; client = getclnthandle(host, nconf, NULL); if (client == NULL) { return (head); } clnt_st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_DUMP, (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_rpcblist_ptr, (char *)(void *)&head, tottimeout); if (clnt_st == RPC_SUCCESS) goto done; if ((clnt_st != RPC_PROGVERSMISMATCH) && (clnt_st != RPC_PROGUNAVAIL)) { rpc_createerr.cf_stat = RPC_RPCBFAILURE; clnt_geterr(client, &rpc_createerr.cf_error); goto done; } /* fall back to earlier version */ CLNT_CONTROL(client, CLGET_VERS, (char *)(void *)&vers); if (vers == RPCBVERS4) { vers = RPCBVERS; CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers); if (CLNT_CALL(client, (rpcproc_t)RPCBPROC_DUMP, (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_rpcblist_ptr, (char *)(void *)&head, tottimeout) == RPC_SUCCESS) goto done; } rpc_createerr.cf_stat = RPC_RPCBFAILURE; clnt_geterr(client, &rpc_createerr.cf_error); done: CLNT_DESTROY(client); return (head); } /* * rpcbinder remote-call-service interface. * This routine is used to call the rpcbind remote call service * which will look up a service program in the address maps, and then * remotely call that routine with the given parameters. This allows * programs to do a lookup and call in one step. -*/ + * + * nconf -Netconfig structure + * host - Remote host name + * proc - Remote proc identifiers + * xdrargs, xdrres; XDR routines + * argsp, resp - Argument and Result + * tout - Timeout value for this call + * addr_ptr - Preallocated netbuf address + */ enum clnt_stat -rpcb_rmtcall(nconf, host, prog, vers, proc, xdrargs, argsp, - xdrres, resp, tout, addr_ptr) - const struct netconfig *nconf; /* Netconfig structure */ - const char *host; /* Remote host name */ - rpcprog_t prog; - rpcvers_t vers; - rpcproc_t proc; /* Remote proc identifiers */ - xdrproc_t xdrargs, xdrres; /* XDR routines */ - caddr_t argsp, resp; /* Argument and Result */ - struct timeval tout; /* Timeout value for this call */ - const struct netbuf *addr_ptr; /* Preallocated netbuf address */ +rpcb_rmtcall(const struct netconfig *nconf, const char *host, rpcprog_t prog, + rpcvers_t vers, rpcproc_t proc, xdrproc_t xdrargs, caddr_t argsp, + xdrproc_t xdrres, caddr_t resp, struct timeval tout, + const struct netbuf *addr_ptr) { CLIENT *client; enum clnt_stat stat; struct r_rpcb_rmtcallargs a; struct r_rpcb_rmtcallres r; rpcvers_t rpcb_vers; stat = 0; client = getclnthandle(host, nconf, NULL); if (client == NULL) { return (RPC_FAILED); } /*LINTED const castaway*/ CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, (char *)(void *)&rmttimeout); a.prog = prog; a.vers = vers; a.proc = proc; a.args.args_val = argsp; a.xdr_args = xdrargs; r.addr = NULL; r.results.results_val = resp; r.xdr_res = xdrres; for (rpcb_vers = RPCBVERS4; rpcb_vers >= RPCBVERS; rpcb_vers--) { CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&rpcb_vers); stat = CLNT_CALL(client, (rpcproc_t)RPCBPROC_CALLIT, (xdrproc_t) xdr_rpcb_rmtcallargs, (char *)(void *)&a, (xdrproc_t) xdr_rpcb_rmtcallres, (char *)(void *)&r, tout); if ((stat == RPC_SUCCESS) && (addr_ptr != NULL)) { struct netbuf *na; /*LINTED const castaway*/ na = uaddr2taddr((struct netconfig *) nconf, r.addr); if (!na) { stat = RPC_N2AXLATEFAILURE; /*LINTED const castaway*/ ((struct netbuf *) addr_ptr)->len = 0; goto error; } if (na->len > addr_ptr->maxlen) { /* Too long address */ stat = RPC_FAILED; /* XXX A better error no */ free(na->buf); free(na); /*LINTED const castaway*/ ((struct netbuf *) addr_ptr)->len = 0; goto error; } memcpy(addr_ptr->buf, na->buf, (size_t)na->len); /*LINTED const castaway*/ ((struct netbuf *)addr_ptr)->len = na->len; free(na->buf); free(na); break; } else if ((stat != RPC_PROGVERSMISMATCH) && (stat != RPC_PROGUNAVAIL)) { goto error; } } error: CLNT_DESTROY(client); if (r.addr) xdr_free((xdrproc_t) xdr_wrapstring, (char *)(void *)&r.addr); return (stat); } /* * Gets the time on the remote host. * Returns 1 if succeeds else 0. */ bool_t rpcb_gettime(const char *host, time_t *timep) { CLIENT *client = NULL; void *handle; struct netconfig *nconf; rpcvers_t vers; enum clnt_stat st; if ((host == NULL) || (host[0] == 0)) { time(timep); return (TRUE); } if ((handle = __rpc_setconf("netpath")) == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return (FALSE); } rpc_createerr.cf_stat = RPC_SUCCESS; while (client == NULL) { if ((nconf = __rpc_getconf(handle)) == NULL) { if (rpc_createerr.cf_stat == RPC_SUCCESS) rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; break; } client = getclnthandle(host, nconf, NULL); if (client) break; } __rpc_endconf(handle); if (client == (CLIENT *) NULL) { return (FALSE); } st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETTIME, (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_int, (char *)(void *)timep, tottimeout); if ((st == RPC_PROGVERSMISMATCH) || (st == RPC_PROGUNAVAIL)) { CLNT_CONTROL(client, CLGET_VERS, (char *)(void *)&vers); if (vers == RPCBVERS4) { /* fall back to earlier version */ vers = RPCBVERS; CLNT_CONTROL(client, CLSET_VERS, (char *)(void *)&vers); st = CLNT_CALL(client, (rpcproc_t)RPCBPROC_GETTIME, (xdrproc_t) xdr_void, NULL, (xdrproc_t) xdr_int, (char *)(void *)timep, tottimeout); } } CLNT_DESTROY(client); return (st == RPC_SUCCESS? TRUE: FALSE); } /* * Converts taddr to universal address. This routine should never * really be called because local n2a libraries are always provided. */ char * rpcb_taddr2uaddr(struct netconfig *nconf, struct netbuf *taddr) { CLIENT *client; char *uaddr = NULL; /* parameter checking */ if (nconf == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return (NULL); } if (taddr == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNADDR; return (NULL); } client = local_rpcb(); if (! client) { return (NULL); } CLNT_CALL(client, (rpcproc_t)RPCBPROC_TADDR2UADDR, (xdrproc_t) xdr_netbuf, (char *)(void *)taddr, (xdrproc_t) xdr_wrapstring, (char *)(void *)&uaddr, tottimeout); CLNT_DESTROY(client); return (uaddr); } /* * Converts universal address to netbuf. This routine should never * really be called because local n2a libraries are always provided. */ struct netbuf * rpcb_uaddr2taddr(struct netconfig *nconf, char *uaddr) { CLIENT *client; struct netbuf *taddr; /* parameter checking */ if (nconf == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNPROTO; return (NULL); } if (uaddr == NULL) { rpc_createerr.cf_stat = RPC_UNKNOWNADDR; return (NULL); } client = local_rpcb(); if (! client) { return (NULL); } taddr = (struct netbuf *)calloc(1, sizeof (struct netbuf)); if (taddr == NULL) { CLNT_DESTROY(client); return (NULL); } if (CLNT_CALL(client, (rpcproc_t)RPCBPROC_UADDR2TADDR, (xdrproc_t) xdr_wrapstring, (char *)(void *)&uaddr, (xdrproc_t) xdr_netbuf, (char *)(void *)taddr, tottimeout) != RPC_SUCCESS) { free(taddr); taddr = NULL; } CLNT_DESTROY(client); return (taddr); } Index: projects/clang370-import/lib/libc/rpc/rpcb_prot.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpcb_prot.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpcb_prot.c (revision 288126) @@ -1,331 +1,315 @@ /* $NetBSD: rpcb_prot.c,v 1.3 2000/07/14 08:40:42 fvdl Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ /* #ident "@(#)rpcb_prot.c 1.13 94/04/24 SMI" */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)rpcb_prot.c 1.9 89/04/21 Copyr 1984 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * rpcb_prot.c * XDR routines for the rpcbinder version 3. * * Copyright (C) 1984, 1988, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include "un-namespace.h" bool_t -xdr_rpcb(xdrs, objp) - XDR *xdrs; - RPCB *objp; +xdr_rpcb(XDR *xdrs, RPCB *objp) { if (!xdr_u_int32_t(xdrs, &objp->r_prog)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->r_vers)) { return (FALSE); } if (!xdr_string(xdrs, &objp->r_netid, (u_int)~0)) { return (FALSE); } if (!xdr_string(xdrs, &objp->r_addr, (u_int)~0)) { return (FALSE); } if (!xdr_string(xdrs, &objp->r_owner, (u_int)~0)) { return (FALSE); } return (TRUE); } /* * rpcblist_ptr implements a linked list. The RPCL definition from * rpcb_prot.x is: * * struct rpcblist { * rpcb rpcb_map; * struct rpcblist *rpcb_next; * }; * typedef rpcblist *rpcblist_ptr; * * Recall that "pointers" in XDR are encoded as a boolean, indicating whether * there's any data behind the pointer, followed by the data (if any exists). * The boolean can be interpreted as ``more data follows me''; if FALSE then * nothing follows the boolean; if TRUE then the boolean is followed by an * actual struct rpcb, and another rpcblist_ptr (declared in RPCL as "struct * rpcblist *"). * * This could be implemented via the xdr_pointer type, though this would * result in one recursive call per element in the list. Rather than do that * we can ``unwind'' the recursion into a while loop and use xdr_reference to * serialize the rpcb elements. */ bool_t -xdr_rpcblist_ptr(xdrs, rp) - XDR *xdrs; - rpcblist_ptr *rp; +xdr_rpcblist_ptr(XDR *xdrs, rpcblist_ptr *rp) { /* * more_elements is pre-computed in case the direction is * XDR_ENCODE or XDR_FREE. more_elements is overwritten by * xdr_bool when the direction is XDR_DECODE. */ bool_t more_elements; int freeing = (xdrs->x_op == XDR_FREE); rpcblist_ptr next; rpcblist_ptr next_copy; next = NULL; for (;;) { more_elements = (bool_t)(*rp != NULL); if (! xdr_bool(xdrs, &more_elements)) { return (FALSE); } if (! more_elements) { return (TRUE); /* we are done */ } /* * the unfortunate side effect of non-recursion is that in * the case of freeing we must remember the next object * before we free the current object ... */ if (freeing && *rp) next = (*rp)->rpcb_next; if (! xdr_reference(xdrs, (caddr_t *)rp, (u_int)sizeof (rpcblist), (xdrproc_t)xdr_rpcb)) { return (FALSE); } if (freeing) { next_copy = next; rp = &next_copy; /* * Note that in the subsequent iteration, next_copy * gets nulled out by the xdr_reference * but next itself survives. */ } else if (*rp) { rp = &((*rp)->rpcb_next); } } /*NOTREACHED*/ } /* * xdr_rpcblist() is specified to take a RPCBLIST **, but is identical in * functionality to xdr_rpcblist_ptr(). */ bool_t -xdr_rpcblist(xdrs, rp) - XDR *xdrs; - RPCBLIST **rp; +xdr_rpcblist(XDR *xdrs, RPCBLIST **rp) { bool_t dummy; dummy = xdr_rpcblist_ptr(xdrs, (rpcblist_ptr *)rp); return (dummy); } bool_t -xdr_rpcb_entry(xdrs, objp) - XDR *xdrs; - rpcb_entry *objp; +xdr_rpcb_entry(XDR *xdrs, rpcb_entry *objp) { if (!xdr_string(xdrs, &objp->r_maddr, (u_int)~0)) { return (FALSE); } if (!xdr_string(xdrs, &objp->r_nc_netid, (u_int)~0)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->r_nc_semantics)) { return (FALSE); } if (!xdr_string(xdrs, &objp->r_nc_protofmly, (u_int)~0)) { return (FALSE); } if (!xdr_string(xdrs, &objp->r_nc_proto, (u_int)~0)) { return (FALSE); } return (TRUE); } bool_t -xdr_rpcb_entry_list_ptr(xdrs, rp) - XDR *xdrs; - rpcb_entry_list_ptr *rp; +xdr_rpcb_entry_list_ptr(XDR *xdrs, rpcb_entry_list_ptr *rp) { /* * more_elements is pre-computed in case the direction is * XDR_ENCODE or XDR_FREE. more_elements is overwritten by * xdr_bool when the direction is XDR_DECODE. */ bool_t more_elements; int freeing = (xdrs->x_op == XDR_FREE); rpcb_entry_list_ptr next; rpcb_entry_list_ptr next_copy; next = NULL; for (;;) { more_elements = (bool_t)(*rp != NULL); if (! xdr_bool(xdrs, &more_elements)) { return (FALSE); } if (! more_elements) { return (TRUE); /* we are done */ } /* * the unfortunate side effect of non-recursion is that in * the case of freeing we must remember the next object * before we free the current object ... */ if (freeing) next = (*rp)->rpcb_entry_next; if (! xdr_reference(xdrs, (caddr_t *)rp, (u_int)sizeof (rpcb_entry_list), (xdrproc_t)xdr_rpcb_entry)) { return (FALSE); } if (freeing && *rp) { next_copy = next; rp = &next_copy; /* * Note that in the subsequent iteration, next_copy * gets nulled out by the xdr_reference * but next itself survives. */ } else if (*rp) { rp = &((*rp)->rpcb_entry_next); } } /*NOTREACHED*/ } /* * XDR remote call arguments * written for XDR_ENCODE direction only */ bool_t -xdr_rpcb_rmtcallargs(xdrs, p) - XDR *xdrs; - struct rpcb_rmtcallargs *p; +xdr_rpcb_rmtcallargs(XDR *xdrs, struct rpcb_rmtcallargs *p) { struct r_rpcb_rmtcallargs *objp = (struct r_rpcb_rmtcallargs *)(void *)p; u_int lenposition, argposition, position; int32_t *buf; buf = XDR_INLINE(xdrs, 3 * BYTES_PER_XDR_UNIT); if (buf == NULL) { if (!xdr_u_int32_t(xdrs, &objp->prog)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->vers)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->proc)) { return (FALSE); } } else { IXDR_PUT_U_INT32(buf, objp->prog); IXDR_PUT_U_INT32(buf, objp->vers); IXDR_PUT_U_INT32(buf, objp->proc); } /* * All the jugglery for just getting the size of the arguments */ lenposition = XDR_GETPOS(xdrs); if (! xdr_u_int(xdrs, &(objp->args.args_len))) { return (FALSE); } argposition = XDR_GETPOS(xdrs); if (! (*objp->xdr_args)(xdrs, objp->args.args_val)) { return (FALSE); } position = XDR_GETPOS(xdrs); objp->args.args_len = (u_int)((u_long)position - (u_long)argposition); XDR_SETPOS(xdrs, lenposition); if (! xdr_u_int(xdrs, &(objp->args.args_len))) { return (FALSE); } XDR_SETPOS(xdrs, position); return (TRUE); } /* * XDR remote call results * written for XDR_DECODE direction only */ bool_t -xdr_rpcb_rmtcallres(xdrs, p) - XDR *xdrs; - struct rpcb_rmtcallres *p; +xdr_rpcb_rmtcallres(XDR *xdrs, struct rpcb_rmtcallres *p) { bool_t dummy; struct r_rpcb_rmtcallres *objp = (struct r_rpcb_rmtcallres *)(void *)p; if (!xdr_string(xdrs, &objp->addr, (u_int)~0)) { return (FALSE); } if (!xdr_u_int(xdrs, &objp->results.results_len)) { return (FALSE); } dummy = (*(objp->xdr_res))(xdrs, objp->results.results_val); return (dummy); } bool_t -xdr_netbuf(xdrs, objp) - XDR *xdrs; - struct netbuf *objp; +xdr_netbuf(XDR *xdrs, struct netbuf *objp) { bool_t dummy; void **pp; if (!xdr_u_int32_t(xdrs, (u_int32_t *) &objp->maxlen)) { return (FALSE); } pp = &objp->buf; dummy = xdr_bytes(xdrs, (char **) pp, (u_int *)&(objp->len), objp->maxlen); return (dummy); } Index: projects/clang370-import/lib/libc/rpc/rpcb_st_xdr.c =================================================================== --- projects/clang370-import/lib/libc/rpc/rpcb_st_xdr.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/rpcb_st_xdr.c (revision 288126) @@ -1,274 +1,260 @@ /* $NetBSD: rpcb_st_xdr.c,v 1.3 2000/07/14 08:40:42 fvdl Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright 1991 Sun Microsystems, Inc. * rpcb_stat_xdr.c */ /* * This file was generated from rpcb_prot.x, but includes only those * routines used with the rpcbind stats facility. */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include "un-namespace.h" /* Link list of all the stats about getport and getaddr */ bool_t -xdr_rpcbs_addrlist(xdrs, objp) - XDR *xdrs; - rpcbs_addrlist *objp; +xdr_rpcbs_addrlist(XDR *xdrs, rpcbs_addrlist *objp) { struct rpcbs_addrlist **pnext; if (!xdr_u_int32_t(xdrs, &objp->prog)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->vers)) { return (FALSE); } if (!xdr_int(xdrs, &objp->success)) { return (FALSE); } if (!xdr_int(xdrs, &objp->failure)) { return (FALSE); } if (!xdr_string(xdrs, &objp->netid, (u_int)~0)) { return (FALSE); } pnext = &objp->next; if (!xdr_pointer(xdrs, (char **) pnext, sizeof (rpcbs_addrlist), (xdrproc_t)xdr_rpcbs_addrlist)) { return (FALSE); } return (TRUE); } /* Link list of all the stats about rmtcall */ bool_t -xdr_rpcbs_rmtcalllist(xdrs, objp) - XDR *xdrs; - rpcbs_rmtcalllist *objp; +xdr_rpcbs_rmtcalllist(XDR *xdrs, rpcbs_rmtcalllist *objp) { int32_t *buf; struct rpcbs_rmtcalllist **pnext; if (xdrs->x_op == XDR_ENCODE) { buf = XDR_INLINE(xdrs, 6 * BYTES_PER_XDR_UNIT); if (buf == NULL) { if (!xdr_u_int32_t(xdrs, &objp->prog)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->vers)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->proc)) { return (FALSE); } if (!xdr_int(xdrs, &objp->success)) { return (FALSE); } if (!xdr_int(xdrs, &objp->failure)) { return (FALSE); } if (!xdr_int(xdrs, &objp->indirect)) { return (FALSE); } } else { IXDR_PUT_U_INT32(buf, objp->prog); IXDR_PUT_U_INT32(buf, objp->vers); IXDR_PUT_U_INT32(buf, objp->proc); IXDR_PUT_INT32(buf, objp->success); IXDR_PUT_INT32(buf, objp->failure); IXDR_PUT_INT32(buf, objp->indirect); } if (!xdr_string(xdrs, &objp->netid, (u_int)~0)) { return (FALSE); } pnext = &objp->next; if (!xdr_pointer(xdrs, (char **) pnext, sizeof (rpcbs_rmtcalllist), (xdrproc_t)xdr_rpcbs_rmtcalllist)) { return (FALSE); } return (TRUE); } else if (xdrs->x_op == XDR_DECODE) { buf = XDR_INLINE(xdrs, 6 * BYTES_PER_XDR_UNIT); if (buf == NULL) { if (!xdr_u_int32_t(xdrs, &objp->prog)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->vers)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->proc)) { return (FALSE); } if (!xdr_int(xdrs, &objp->success)) { return (FALSE); } if (!xdr_int(xdrs, &objp->failure)) { return (FALSE); } if (!xdr_int(xdrs, &objp->indirect)) { return (FALSE); } } else { objp->prog = (rpcprog_t)IXDR_GET_U_INT32(buf); objp->vers = (rpcvers_t)IXDR_GET_U_INT32(buf); objp->proc = (rpcproc_t)IXDR_GET_U_INT32(buf); objp->success = (int)IXDR_GET_INT32(buf); objp->failure = (int)IXDR_GET_INT32(buf); objp->indirect = (int)IXDR_GET_INT32(buf); } if (!xdr_string(xdrs, &objp->netid, (u_int)~0)) { return (FALSE); } if (!xdr_pointer(xdrs, (char **) pnext, sizeof (rpcbs_rmtcalllist), (xdrproc_t)xdr_rpcbs_rmtcalllist)) { return (FALSE); } return (TRUE); } if (!xdr_u_int32_t(xdrs, &objp->prog)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->vers)) { return (FALSE); } if (!xdr_u_int32_t(xdrs, &objp->proc)) { return (FALSE); } if (!xdr_int(xdrs, &objp->success)) { return (FALSE); } if (!xdr_int(xdrs, &objp->failure)) { return (FALSE); } if (!xdr_int(xdrs, &objp->indirect)) { return (FALSE); } if (!xdr_string(xdrs, &objp->netid, (u_int)~0)) { return (FALSE); } if (!xdr_pointer(xdrs, (char **) pnext, sizeof (rpcbs_rmtcalllist), (xdrproc_t)xdr_rpcbs_rmtcalllist)) { return (FALSE); } return (TRUE); } bool_t -xdr_rpcbs_proc(xdrs, objp) - XDR *xdrs; - rpcbs_proc objp; +xdr_rpcbs_proc(XDR *xdrs, rpcbs_proc objp) { if (!xdr_vector(xdrs, (char *)(void *)objp, RPCBSTAT_HIGHPROC, sizeof (int), (xdrproc_t)xdr_int)) { return (FALSE); } return (TRUE); } bool_t -xdr_rpcbs_addrlist_ptr(xdrs, objp) - XDR *xdrs; - rpcbs_addrlist_ptr *objp; +xdr_rpcbs_addrlist_ptr(XDR *xdrs, rpcbs_addrlist_ptr *objp) { if (!xdr_pointer(xdrs, (char **)objp, sizeof (rpcbs_addrlist), (xdrproc_t)xdr_rpcbs_addrlist)) { return (FALSE); } return (TRUE); } bool_t -xdr_rpcbs_rmtcalllist_ptr(xdrs, objp) - XDR *xdrs; - rpcbs_rmtcalllist_ptr *objp; +xdr_rpcbs_rmtcalllist_ptr(XDR *xdrs, rpcbs_rmtcalllist_ptr *objp) { if (!xdr_pointer(xdrs, (char **)objp, sizeof (rpcbs_rmtcalllist), (xdrproc_t)xdr_rpcbs_rmtcalllist)) { return (FALSE); } return (TRUE); } bool_t -xdr_rpcb_stat(xdrs, objp) - XDR *xdrs; - rpcb_stat *objp; +xdr_rpcb_stat(XDR *xdrs, rpcb_stat *objp) { if (!xdr_rpcbs_proc(xdrs, objp->info)) { return (FALSE); } if (!xdr_int(xdrs, &objp->setinfo)) { return (FALSE); } if (!xdr_int(xdrs, &objp->unsetinfo)) { return (FALSE); } if (!xdr_rpcbs_addrlist_ptr(xdrs, &objp->addrinfo)) { return (FALSE); } if (!xdr_rpcbs_rmtcalllist_ptr(xdrs, &objp->rmtinfo)) { return (FALSE); } return (TRUE); } /* * One rpcb_stat structure is returned for each version of rpcbind * being monitored. */ bool_t -xdr_rpcb_stat_byvers(xdrs, objp) - XDR *xdrs; - rpcb_stat_byvers objp; +xdr_rpcb_stat_byvers(XDR *xdrs, rpcb_stat_byvers objp) { if (!xdr_vector(xdrs, (char *)(void *)objp, RPCBVERS_STAT, sizeof (rpcb_stat), (xdrproc_t)xdr_rpcb_stat)) { return (FALSE); } return (TRUE); } Index: projects/clang370-import/lib/libc/rpc/svc_auth.c =================================================================== --- projects/clang370-import/lib/libc/rpc/svc_auth.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/svc_auth.c (revision 288126) @@ -1,226 +1,224 @@ /* $NetBSD: svc_auth.c,v 1.12 2000/07/06 03:10:35 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ #if defined(LIBC_SCCS) && !defined(lint) #ident "@(#)svc_auth.c 1.16 94/04/24 SMI" static char sccsid[] = "@(#)svc_auth.c 1.26 89/02/07 Copyr 1984 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * svc_auth.c, Server-side rpc authenticator interface. * */ #include "namespace.h" #include "reentrant.h" #include #include #include #include "un-namespace.h" #include "mt_misc.h" /* * svcauthsw is the bdevsw of server side authentication. * * Server side authenticators are called from authenticate by * using the client auth struct flavor field to index into svcauthsw. * The server auth flavors must implement a routine that looks * like: * * enum auth_stat * flavorx_auth(rqst, msg) * struct svc_req *rqst; * struct rpc_msg *msg; * */ /* declarations to allow servers to specify new authentication flavors */ struct authsvc { int flavor; enum auth_stat (*handler)(struct svc_req *, struct rpc_msg *); struct authsvc *next; }; static struct authsvc *Auths = NULL; struct svc_auth_ops svc_auth_null_ops; /* * The call rpc message, msg has been obtained from the wire. The msg contains * the raw form of credentials and verifiers. authenticate returns AUTH_OK * if the msg is successfully authenticated. If AUTH_OK then the routine also * does the following things: * set rqst->rq_xprt->verf to the appropriate response verifier; * sets rqst->rq_client_cred to the "cooked" form of the credentials. * * NB: rqst->rq_cxprt->verf must be pre-alloctaed; * its length is set appropriately. * * The caller still owns and is responsible for msg->u.cmb.cred and * msg->u.cmb.verf. The authentication system retains ownership of * rqst->rq_client_cred, the cooked credentials. * * There is an assumption that any flavour less than AUTH_NULL is * invalid. */ enum auth_stat -_authenticate(rqst, msg) - struct svc_req *rqst; - struct rpc_msg *msg; +_authenticate(struct svc_req *rqst, struct rpc_msg *msg) { int cred_flavor; struct authsvc *asp; enum auth_stat dummy; /* VARIABLES PROTECTED BY authsvc_lock: asp, Auths */ rqst->rq_cred = msg->rm_call.cb_cred; SVC_AUTH(rqst->rq_xprt).svc_ah_ops = &svc_auth_null_ops; SVC_AUTH(rqst->rq_xprt).svc_ah_private = NULL; rqst->rq_xprt->xp_verf.oa_flavor = _null_auth.oa_flavor; rqst->rq_xprt->xp_verf.oa_length = 0; cred_flavor = rqst->rq_cred.oa_flavor; switch (cred_flavor) { case AUTH_NULL: dummy = _svcauth_null(rqst, msg); return (dummy); case AUTH_SYS: dummy = _svcauth_unix(rqst, msg); return (dummy); case AUTH_SHORT: dummy = _svcauth_short(rqst, msg); return (dummy); #ifdef DES_BUILTIN case AUTH_DES: dummy = _svcauth_des(rqst, msg); return (dummy); #endif default: break; } /* flavor doesn't match any of the builtin types, so try new ones */ mutex_lock(&authsvc_lock); for (asp = Auths; asp; asp = asp->next) { if (asp->flavor == cred_flavor) { enum auth_stat as; as = (*asp->handler)(rqst, msg); mutex_unlock(&authsvc_lock); return (as); } } mutex_unlock(&authsvc_lock); return (AUTH_REJECTEDCRED); } /* * A set of null auth methods used by any authentication protocols * that don't need to inspect or modify the message body. */ static bool_t svcauth_null_wrap(SVCAUTH *auth, XDR *xdrs, xdrproc_t xdr_func, caddr_t xdr_ptr) { return (xdr_func(xdrs, xdr_ptr)); } struct svc_auth_ops svc_auth_null_ops = { svcauth_null_wrap, svcauth_null_wrap, }; /*ARGSUSED*/ enum auth_stat _svcauth_null(struct svc_req *rqst, struct rpc_msg *msg) { return (AUTH_OK); } /* * Allow the rpc service to register new authentication types that it is * prepared to handle. When an authentication flavor is registered, * the flavor is checked against already registered values. If not * registered, then a new Auths entry is added on the list. * * There is no provision to delete a registration once registered. * * This routine returns: * 0 if registration successful * 1 if flavor already registered * -1 if can't register (errno set) */ int svc_auth_reg(int cred_flavor, enum auth_stat (*handler)(struct svc_req *, struct rpc_msg *)) { struct authsvc *asp; switch (cred_flavor) { case AUTH_NULL: case AUTH_SYS: case AUTH_SHORT: #ifdef DES_BUILTIN case AUTH_DES: #endif /* already registered */ return (1); default: mutex_lock(&authsvc_lock); for (asp = Auths; asp; asp = asp->next) { if (asp->flavor == cred_flavor) { /* already registered */ mutex_unlock(&authsvc_lock); return (1); } } /* this is a new one, so go ahead and register it */ asp = mem_alloc(sizeof (*asp)); if (asp == NULL) { mutex_unlock(&authsvc_lock); return (-1); } asp->flavor = cred_flavor; asp->handler = handler; asp->next = Auths; Auths = asp; mutex_unlock(&authsvc_lock); break; } return (0); } Index: projects/clang370-import/lib/libc/rpc/svc_auth_unix.c =================================================================== --- projects/clang370-import/lib/libc/rpc/svc_auth_unix.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/svc_auth_unix.c (revision 288126) @@ -1,155 +1,151 @@ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)svc_auth_unix.c 1.28 88/02/08 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)svc_auth_unix.c 2.3 88/08/01 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * svc_auth_unix.c * Handles UNIX flavor authentication parameters on the service side of rpc. * There are two svc auth implementations here: AUTH_UNIX and AUTH_SHORT. * _svcauth_unix does full blown unix style uid,gid+gids auth, * _svcauth_short uses a shorthand auth to index into a cache of longhand auths. * Note: the shorthand has been gutted for efficiency. * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include "namespace.h" #include #include #include #include #include "un-namespace.h" /* * Unix longhand authenticator */ enum auth_stat -_svcauth_unix(rqst, msg) - struct svc_req *rqst; - struct rpc_msg *msg; +_svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg) { enum auth_stat stat; XDR xdrs; struct authunix_parms *aup; int32_t *buf; struct area { struct authunix_parms area_aup; char area_machname[MAX_MACHINE_NAME+1]; u_int area_gids[NGRPS]; } *area; u_int auth_len; size_t str_len, gid_len; u_int i; assert(rqst != NULL); assert(msg != NULL); area = (struct area *) rqst->rq_clntcred; aup = &area->area_aup; aup->aup_machname = area->area_machname; aup->aup_gids = area->area_gids; auth_len = (u_int)msg->rm_call.cb_cred.oa_length; xdrmem_create(&xdrs, msg->rm_call.cb_cred.oa_base, auth_len,XDR_DECODE); buf = XDR_INLINE(&xdrs, auth_len); if (buf != NULL) { aup->aup_time = IXDR_GET_INT32(buf); str_len = (size_t)IXDR_GET_U_INT32(buf); if (str_len > MAX_MACHINE_NAME) { stat = AUTH_BADCRED; goto done; } memmove(aup->aup_machname, buf, str_len); aup->aup_machname[str_len] = 0; str_len = RNDUP(str_len); buf += str_len / sizeof (int32_t); aup->aup_uid = (int)IXDR_GET_INT32(buf); aup->aup_gid = (int)IXDR_GET_INT32(buf); gid_len = (size_t)IXDR_GET_U_INT32(buf); if (gid_len > NGRPS) { stat = AUTH_BADCRED; goto done; } aup->aup_len = gid_len; for (i = 0; i < gid_len; i++) { aup->aup_gids[i] = (int)IXDR_GET_INT32(buf); } /* * five is the smallest unix credentials structure - * timestamp, hostname len (0), uid, gid, and gids len (0). */ if ((5 + gid_len) * BYTES_PER_XDR_UNIT + str_len > auth_len) { (void) printf("bad auth_len gid %ld str %ld auth %u\n", (long)gid_len, (long)str_len, auth_len); stat = AUTH_BADCRED; goto done; } } else if (! xdr_authunix_parms(&xdrs, aup)) { xdrs.x_op = XDR_FREE; (void)xdr_authunix_parms(&xdrs, aup); stat = AUTH_BADCRED; goto done; } /* get the verifier */ if ((u_int)msg->rm_call.cb_verf.oa_length) { rqst->rq_xprt->xp_verf.oa_flavor = msg->rm_call.cb_verf.oa_flavor; rqst->rq_xprt->xp_verf.oa_base = msg->rm_call.cb_verf.oa_base; rqst->rq_xprt->xp_verf.oa_length = msg->rm_call.cb_verf.oa_length; } else { rqst->rq_xprt->xp_verf.oa_flavor = AUTH_NULL; rqst->rq_xprt->xp_verf.oa_length = 0; } stat = AUTH_OK; done: XDR_DESTROY(&xdrs); return (stat); } /* * Shorthand unix authenticator * Looks up longhand in a cache. */ /*ARGSUSED*/ enum auth_stat -_svcauth_short(rqst, msg) - struct svc_req *rqst; - struct rpc_msg *msg; +_svcauth_short(struct svc_req *rqst, struct rpc_msg *msg) { return (AUTH_REJECTEDCRED); } Index: projects/clang370-import/lib/libc/rpc/svc_generic.c =================================================================== --- projects/clang370-import/lib/libc/rpc/svc_generic.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/svc_generic.c (revision 288126) @@ -1,310 +1,314 @@ /* $NetBSD: svc_generic.c,v 1.3 2000/07/06 03:10:35 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ #if defined(LIBC_SCCS) && !defined(lint) #ident "@(#)svc_generic.c 1.19 94/04/24 SMI" static char sccsid[] = "@(#)svc_generic.c 1.21 89/02/28 Copyr 1988 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * svc_generic.c, Server side for RPC. * */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #include "mt_misc.h" extern int __svc_vc_setflag(SVCXPRT *, int); /* * The highest level interface for server creation. * It tries for all the nettokens in that particular class of token * and returns the number of handles it can create and/or find. * * It creates a link list of all the handles it could create. * If svc_create() is called multiple times, it uses the handle * created earlier instead of creating a new handle every time. + * + * prognum - Program number + * versnum - Version number + * nettype - Networktype token */ int -svc_create(dispatch, prognum, versnum, nettype) - void (*dispatch)(struct svc_req *, SVCXPRT *); - rpcprog_t prognum; /* Program number */ - rpcvers_t versnum; /* Version number */ - const char *nettype; /* Networktype token */ +svc_create(void (*dispatch)(struct svc_req *, SVCXPRT *), + rpcprog_t prognum, rpcvers_t versnum, const char *nettype) { struct xlist { SVCXPRT *xprt; /* Server handle */ struct xlist *next; /* Next item */ } *l; static struct xlist *xprtlist; /* A link list of all the handles */ int num = 0; SVCXPRT *xprt; struct netconfig *nconf; void *handle; /* VARIABLES PROTECTED BY xprtlist_lock: xprtlist */ if ((handle = __rpc_setconf(nettype)) == NULL) { warnx("svc_create: unknown protocol"); return (0); } while ((nconf = __rpc_getconf(handle)) != NULL) { mutex_lock(&xprtlist_lock); for (l = xprtlist; l; l = l->next) { if (strcmp(l->xprt->xp_netid, nconf->nc_netid) == 0) { /* Found an old one, use it */ (void) rpcb_unset(prognum, versnum, nconf); if (svc_reg(l->xprt, prognum, versnum, dispatch, nconf) == FALSE) warnx( "svc_create: could not register prog %u vers %u on %s", (unsigned)prognum, (unsigned)versnum, nconf->nc_netid); else num++; break; } } if (l == NULL) { /* It was not found. Now create a new one */ xprt = svc_tp_create(dispatch, prognum, versnum, nconf); if (xprt) { l = (struct xlist *)malloc(sizeof (*l)); if (l == NULL) { warnx("svc_create: no memory"); mutex_unlock(&xprtlist_lock); return (0); } l->xprt = xprt; l->next = xprtlist; xprtlist = l; num++; } } mutex_unlock(&xprtlist_lock); } __rpc_endconf(handle); /* * In case of num == 0; the error messages are generated by the * underlying layers; and hence not needed here. */ return (num); } /* * The high level interface to svc_tli_create(). * It tries to create a server for "nconf" and registers the service * with the rpcbind. It calls svc_tli_create(); + * + * prognum - Program number + * versnum - Version number + * ncofn - Netconfig structure for the network */ SVCXPRT * -svc_tp_create(dispatch, prognum, versnum, nconf) - void (*dispatch)(struct svc_req *, SVCXPRT *); - rpcprog_t prognum; /* Program number */ - rpcvers_t versnum; /* Version number */ - const struct netconfig *nconf; /* Netconfig structure for the network */ +svc_tp_create(void (*dispatch)(struct svc_req *, SVCXPRT *), + rpcprog_t prognum, rpcvers_t versnum, const struct netconfig *nconf) { SVCXPRT *xprt; if (nconf == NULL) { warnx( "svc_tp_create: invalid netconfig structure for prog %u vers %u", (unsigned)prognum, (unsigned)versnum); return (NULL); } xprt = svc_tli_create(RPC_ANYFD, nconf, NULL, 0, 0); if (xprt == NULL) { return (NULL); } /*LINTED const castaway*/ (void) rpcb_unset(prognum, versnum, (struct netconfig *) nconf); if (svc_reg(xprt, prognum, versnum, dispatch, nconf) == FALSE) { warnx( "svc_tp_create: Could not register prog %u vers %u on %s", (unsigned)prognum, (unsigned)versnum, nconf->nc_netid); SVC_DESTROY(xprt); return (NULL); } return (xprt); } /* * If fd is RPC_ANYFD, then it opens a fd for the given transport * provider (nconf cannot be NULL then). If the t_state is T_UNBND and * bindaddr is NON-NULL, it performs a t_bind using the bindaddr. For * NULL bindadr and Connection oriented transports, the value of qlen * is set to 8. * * If sendsz or recvsz are zero, their default values are chosen. + * + * fd - Connection end point + * nconf - Netconfig struct for nettoken + * bindaddr - Local bind address + * sendsz - Max sendsize + * recvxz - Max recvsize */ SVCXPRT * -svc_tli_create(fd, nconf, bindaddr, sendsz, recvsz) - int fd; /* Connection end point */ - const struct netconfig *nconf; /* Netconfig struct for nettoken */ - const struct t_bind *bindaddr; /* Local bind address */ - u_int sendsz; /* Max sendsize */ - u_int recvsz; /* Max recvsize */ +svc_tli_create(int fd, const struct netconfig *nconf, + const struct t_bind *bindaddr, u_int sendsz, u_int recvsz) { SVCXPRT *xprt = NULL; /* service handle */ bool_t madefd = FALSE; /* whether fd opened here */ struct __rpc_sockinfo si; struct sockaddr_storage ss; socklen_t slen; if (fd == RPC_ANYFD) { if (nconf == NULL) { warnx("svc_tli_create: invalid netconfig"); return (NULL); } fd = __rpc_nconf2fd(nconf); if (fd == -1) { warnx( "svc_tli_create: could not open connection for %s", nconf->nc_netid); return (NULL); } __rpc_nconf2sockinfo(nconf, &si); madefd = TRUE; } else { /* * It is an open descriptor. Get the transport info. */ if (!__rpc_fd2sockinfo(fd, &si)) { warnx( "svc_tli_create: could not get transport information"); return (NULL); } } /* * If the fd is unbound, try to bind it. */ if (madefd || !__rpc_sockisbound(fd)) { if (bindaddr == NULL) { if (bindresvport(fd, NULL) < 0) { memset(&ss, 0, sizeof ss); ss.ss_family = si.si_af; ss.ss_len = si.si_alen; if (_bind(fd, (struct sockaddr *)(void *)&ss, (socklen_t)si.si_alen) < 0) { warnx( "svc_tli_create: could not bind to anonymous port"); goto freedata; } } _listen(fd, SOMAXCONN); } else { if (_bind(fd, (struct sockaddr *)bindaddr->addr.buf, (socklen_t)si.si_alen) < 0) { warnx( "svc_tli_create: could not bind to requested address"); goto freedata; } _listen(fd, (int)bindaddr->qlen); } } /* * call transport specific function. */ switch (si.si_socktype) { case SOCK_STREAM: slen = sizeof ss; if (_getpeername(fd, (struct sockaddr *)(void *)&ss, &slen) == 0) { /* accepted socket */ xprt = svc_fd_create(fd, sendsz, recvsz); } else xprt = svc_vc_create(fd, sendsz, recvsz); if (!nconf || !xprt) break; #if 0 /* XXX fvdl */ if (strcmp(nconf->nc_protofmly, "inet") == 0 || strcmp(nconf->nc_protofmly, "inet6") == 0) (void) __svc_vc_setflag(xprt, TRUE); #endif break; case SOCK_DGRAM: xprt = svc_dg_create(fd, sendsz, recvsz); break; default: warnx("svc_tli_create: bad service type"); goto freedata; } if (xprt == NULL) /* * The error messages here are spitted out by the lower layers: * svc_vc_create(), svc_fd_create() and svc_dg_create(). */ goto freedata; /* Fill in type of service */ xprt->xp_type = __rpc_socktype2seman(si.si_socktype); if (nconf) { xprt->xp_netid = strdup(nconf->nc_netid); xprt->xp_tp = strdup(nconf->nc_device); } return (xprt); freedata: if (madefd) (void)_close(fd); if (xprt) { if (!madefd) /* so that svc_destroy doesnt close fd */ xprt->xp_fd = RPC_ANYFD; SVC_DESTROY(xprt); } return (NULL); } Index: projects/clang370-import/lib/libc/rpc/svc_raw.c =================================================================== --- projects/clang370-import/lib/libc/rpc/svc_raw.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/svc_raw.c (revision 288126) @@ -1,285 +1,269 @@ /* $NetBSD: svc_raw.c,v 1.14 2000/07/06 03:10:35 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ /* #ident "@(#)svc_raw.c 1.16 94/04/24 SMI" */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)svc_raw.c 1.25 89/01/31 Copyr 1984 Sun Micro"; #endif #include __FBSDID("$FreeBSD$"); /* * svc_raw.c, This a toy for simple testing and timing. * Interface to create an rpc client and server in the same UNIX process. * This lets us similate rpc and get rpc (round trip) overhead, without * any interference from the kernel. * */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include "un-namespace.h" #include "mt_misc.h" #ifndef UDPMSGSIZE #define UDPMSGSIZE 8800 #endif /* * This is the "network" that we will be moving data over */ static struct svc_raw_private { char *raw_buf; /* should be shared with the cl handle */ SVCXPRT *server; XDR xdr_stream; char verf_body[MAX_AUTH_BYTES]; } *svc_raw_private; static enum xprt_stat svc_raw_stat(SVCXPRT *); static bool_t svc_raw_recv(SVCXPRT *, struct rpc_msg *); static bool_t svc_raw_reply(SVCXPRT *, struct rpc_msg *); static bool_t svc_raw_getargs(SVCXPRT *, xdrproc_t, void *); static bool_t svc_raw_freeargs(SVCXPRT *, xdrproc_t, void *); static void svc_raw_destroy(SVCXPRT *); static void svc_raw_ops(SVCXPRT *); static bool_t svc_raw_control(SVCXPRT *, const u_int, void *); char *__rpc_rawcombuf = NULL; SVCXPRT * -svc_raw_create() +svc_raw_create(void) { struct svc_raw_private *srp; /* VARIABLES PROTECTED BY svcraw_lock: svc_raw_private, srp */ mutex_lock(&svcraw_lock); srp = svc_raw_private; if (srp == NULL) { srp = (struct svc_raw_private *)calloc(1, sizeof (*srp)); if (srp == NULL) { mutex_unlock(&svcraw_lock); return (NULL); } if (__rpc_rawcombuf == NULL) { __rpc_rawcombuf = calloc(UDPMSGSIZE, sizeof (char)); if (__rpc_rawcombuf == NULL) { free(srp); mutex_unlock(&svcraw_lock); return (NULL); } } srp->raw_buf = __rpc_rawcombuf; /* Share it with the client */ srp->server = svc_xprt_alloc(); if (srp->server == NULL) { free(__rpc_rawcombuf); free(srp); mutex_unlock(&svcraw_lock); return (NULL); } svc_raw_private = srp; } srp->server->xp_fd = FD_SETSIZE; srp->server->xp_port = 0; svc_raw_ops(srp->server); srp->server->xp_verf.oa_base = srp->verf_body; xdrmem_create(&srp->xdr_stream, srp->raw_buf, UDPMSGSIZE, XDR_DECODE); xprt_register(srp->server); mutex_unlock(&svcraw_lock); return (srp->server); } /*ARGSUSED*/ static enum xprt_stat -svc_raw_stat(xprt) -SVCXPRT *xprt; /* args needed to satisfy ANSI-C typechecking */ +svc_raw_stat(SVCXPRT *xprt) { return (XPRT_IDLE); } /*ARGSUSED*/ static bool_t -svc_raw_recv(xprt, msg) - SVCXPRT *xprt; - struct rpc_msg *msg; +svc_raw_recv(SVCXPRT *xprt, struct rpc_msg *msg) { struct svc_raw_private *srp; XDR *xdrs; mutex_lock(&svcraw_lock); srp = svc_raw_private; if (srp == NULL) { mutex_unlock(&svcraw_lock); return (FALSE); } mutex_unlock(&svcraw_lock); xdrs = &srp->xdr_stream; xdrs->x_op = XDR_DECODE; (void) XDR_SETPOS(xdrs, 0); if (! xdr_callmsg(xdrs, msg)) { return (FALSE); } return (TRUE); } /*ARGSUSED*/ static bool_t -svc_raw_reply(xprt, msg) - SVCXPRT *xprt; - struct rpc_msg *msg; +svc_raw_reply(SVCXPRT *xprt, struct rpc_msg *msg) { struct svc_raw_private *srp; XDR *xdrs; bool_t stat; xdrproc_t xdr_proc; caddr_t xdr_where; mutex_lock(&svcraw_lock); srp = svc_raw_private; if (srp == NULL) { mutex_unlock(&svcraw_lock); return (FALSE); } mutex_unlock(&svcraw_lock); xdrs = &srp->xdr_stream; xdrs->x_op = XDR_ENCODE; (void) XDR_SETPOS(xdrs, 0); if (msg->rm_reply.rp_stat == MSG_ACCEPTED && msg->rm_reply.rp_acpt.ar_stat == SUCCESS) { xdr_proc = msg->acpted_rply.ar_results.proc; xdr_where = msg->acpted_rply.ar_results.where; msg->acpted_rply.ar_results.proc = (xdrproc_t) xdr_void; msg->acpted_rply.ar_results.where = NULL; stat = xdr_replymsg(xdrs, msg) && SVCAUTH_WRAP(&SVC_AUTH(xprt), xdrs, xdr_proc, xdr_where); } else { stat = xdr_replymsg(xdrs, msg); } if (!stat) { return (FALSE); } (void) XDR_GETPOS(xdrs); /* called just for overhead */ return (TRUE); } /*ARGSUSED*/ static bool_t -svc_raw_getargs(xprt, xdr_args, args_ptr) - SVCXPRT *xprt; - xdrproc_t xdr_args; - void *args_ptr; +svc_raw_getargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr) { struct svc_raw_private *srp; mutex_lock(&svcraw_lock); srp = svc_raw_private; if (srp == NULL) { mutex_unlock(&svcraw_lock); return (FALSE); } mutex_unlock(&svcraw_lock); return (SVCAUTH_UNWRAP(&SVC_AUTH(xprt), &srp->xdr_stream, xdr_args, args_ptr)); } /*ARGSUSED*/ static bool_t -svc_raw_freeargs(xprt, xdr_args, args_ptr) - SVCXPRT *xprt; - xdrproc_t xdr_args; - void *args_ptr; +svc_raw_freeargs(SVCXPRT *xprt, xdrproc_t xdr_args, void *args_ptr) { struct svc_raw_private *srp; XDR *xdrs; mutex_lock(&svcraw_lock); srp = svc_raw_private; if (srp == NULL) { mutex_unlock(&svcraw_lock); return (FALSE); } mutex_unlock(&svcraw_lock); xdrs = &srp->xdr_stream; xdrs->x_op = XDR_FREE; return (*xdr_args)(xdrs, args_ptr); } /*ARGSUSED*/ static void -svc_raw_destroy(xprt) -SVCXPRT *xprt; +svc_raw_destroy(SVCXPRT *xprt) { } /*ARGSUSED*/ static bool_t -svc_raw_control(xprt, rq, in) - SVCXPRT *xprt; - const u_int rq; - void *in; +svc_raw_control(SVCXPRT *xprt, const u_int rq, void *in) { return (FALSE); } static void -svc_raw_ops(xprt) - SVCXPRT *xprt; +svc_raw_ops(SVCXPRT *xprt) { static struct xp_ops ops; static struct xp_ops2 ops2; /* VARIABLES PROTECTED BY ops_lock: ops */ mutex_lock(&ops_lock); if (ops.xp_recv == NULL) { ops.xp_recv = svc_raw_recv; ops.xp_stat = svc_raw_stat; ops.xp_getargs = svc_raw_getargs; ops.xp_reply = svc_raw_reply; ops.xp_freeargs = svc_raw_freeargs; ops.xp_destroy = svc_raw_destroy; ops2.xp_control = svc_raw_control; } xprt->xp_ops = &ops; xprt->xp_ops2 = &ops2; mutex_unlock(&ops_lock); } Index: projects/clang370-import/lib/libc/rpc/svc_run.c =================================================================== --- projects/clang370-import/lib/libc/rpc/svc_run.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/svc_run.c (revision 288126) @@ -1,97 +1,97 @@ /* $NetBSD: svc_run.c,v 1.17 2000/07/06 03:10:35 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "from: @(#)svc_run.c 1.1 87/10/13 Copyr 1984 Sun Micro"; static char *sccsid = "from: @(#)svc_run.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * This is the rpc server side idle loop * Wait for input, call server program. */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include "un-namespace.h" #include #include "rpc_com.h" #include "mt_misc.h" void -svc_run() +svc_run(void) { fd_set readfds, cleanfds; struct timeval timeout; timeout.tv_sec = 30; timeout.tv_usec = 0; for (;;) { rwlock_rdlock(&svc_fd_lock); readfds = svc_fdset; cleanfds = svc_fdset; rwlock_unlock(&svc_fd_lock); switch (_select(svc_maxfd+1, &readfds, NULL, NULL, &timeout)) { case -1: FD_ZERO(&readfds); if (errno == EINTR) { continue; } _warn("svc_run: - select failed"); return; case 0: __svc_clean_idle(&cleanfds, 30, FALSE); continue; default: svc_getreqset(&readfds); } } } /* * This function causes svc_run() to exit by telling it that it has no * more work to do. */ void -svc_exit() +svc_exit(void) { rwlock_wrlock(&svc_fd_lock); FD_ZERO(&svc_fdset); rwlock_unlock(&svc_fd_lock); } Index: projects/clang370-import/lib/libc/rpc/svc_simple.c =================================================================== --- projects/clang370-import/lib/libc/rpc/svc_simple.c (revision 288125) +++ projects/clang370-import/lib/libc/rpc/svc_simple.c (revision 288126) @@ -1,312 +1,312 @@ /* $NetBSD: svc_simple.c,v 1.20 2000/07/06 03:10:35 christos Exp $ */ /*- * Copyright (c) 2009, Sun Microsystems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of Sun Microsystems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986-1991 by Sun Microsystems Inc. */ /* #pragma ident "@(#)svc_simple.c 1.18 94/04/24 SMI" */ #include __FBSDID("$FreeBSD$"); /* * svc_simple.c * Simplified front end to rpc. */ /* * This interface creates a virtual listener for all the services * started thru rpc_reg(). It listens on the same endpoint for * all the services and then executes the corresponding service * for the given prognum and procnum. */ #include "namespace.h" #include "reentrant.h" #include #include #include #include #include #include #include #include "un-namespace.h" #include "rpc_com.h" #include "mt_misc.h" static void universal(struct svc_req *, SVCXPRT *); static struct proglst { char *(*p_progname)(char *); rpcprog_t p_prognum; rpcvers_t p_versnum; rpcproc_t p_procnum; SVCXPRT *p_transp; char *p_netid; char *p_xdrbuf; int p_recvsz; xdrproc_t p_inproc, p_outproc; struct proglst *p_nxt; } *proglst; static const char rpc_reg_err[] = "%s: %s"; static const char rpc_reg_msg[] = "rpc_reg: "; static const char __reg_err1[] = "can't find appropriate transport"; static const char __reg_err2[] = "can't get protocol info"; static const char __reg_err3[] = "unsupported transport size"; static const char __no_mem_str[] = "out of memory"; /* * For simplified, easy to use kind of rpc interfaces. * nettype indicates the type of transport on which the service will be * listening. Used for conservation of the system resource. Only one * handle is created for all the services (actually one of each netid) * and same xdrbuf is used for same netid. The size of the arguments * is also limited by the recvsize for that transport, even if it is * a COTS transport. This may be wrong, but for cases like these, they * should not use the simplified interfaces like this. + * + * prognum - program number + * versnum - version number + * procnum - procedure number + * progname - Server routine + * inproc, outproc - in/out XDR procedures + * nettype - nettype */ - int -rpc_reg(prognum, versnum, procnum, progname, inproc, outproc, nettype) - rpcprog_t prognum; /* program number */ - rpcvers_t versnum; /* version number */ - rpcproc_t procnum; /* procedure number */ - char *(*progname)(char *); /* Server routine */ - xdrproc_t inproc, outproc; /* in/out XDR procedures */ - char *nettype; /* nettype */ +rpc_reg(rpcprog_t prognum, rpcvers_t versnum, rpcproc_t procnum, + char *(*progname)(char *), xdrproc_t inproc, xdrproc_t outproc, + char *nettype) { struct netconfig *nconf; int done = FALSE; void *handle; if (procnum == NULLPROC) { warnx("%s can't reassign procedure number %u", rpc_reg_msg, NULLPROC); return (-1); } if (nettype == NULL) nettype = "netpath"; /* The default behavior */ if ((handle = __rpc_setconf(nettype)) == NULL) { warnx(rpc_reg_err, rpc_reg_msg, __reg_err1); return (-1); } /* VARIABLES PROTECTED BY proglst_lock: proglst */ mutex_lock(&proglst_lock); while ((nconf = __rpc_getconf(handle)) != NULL) { struct proglst *pl; SVCXPRT *svcxprt; int madenow; u_int recvsz; char *xdrbuf; char *netid; madenow = FALSE; svcxprt = NULL; recvsz = 0; xdrbuf = netid = NULL; for (pl = proglst; pl; pl = pl->p_nxt) { if (strcmp(pl->p_netid, nconf->nc_netid) == 0) { svcxprt = pl->p_transp; xdrbuf = pl->p_xdrbuf; recvsz = pl->p_recvsz; netid = pl->p_netid; break; } } if (svcxprt == NULL) { struct __rpc_sockinfo si; svcxprt = svc_tli_create(RPC_ANYFD, nconf, NULL, 0, 0); if (svcxprt == NULL) continue; if (!__rpc_fd2sockinfo(svcxprt->xp_fd, &si)) { warnx(rpc_reg_err, rpc_reg_msg, __reg_err2); SVC_DESTROY(svcxprt); continue; } recvsz = __rpc_get_t_size(si.si_af, si.si_proto, 0); if (recvsz == 0) { warnx(rpc_reg_err, rpc_reg_msg, __reg_err3); SVC_DESTROY(svcxprt); continue; } if (((xdrbuf = malloc((unsigned)recvsz)) == NULL) || ((netid = strdup(nconf->nc_netid)) == NULL)) { warnx(rpc_reg_err, rpc_reg_msg, __no_mem_str); if (xdrbuf != NULL) free(xdrbuf); if (netid != NULL) free(netid); SVC_DESTROY(svcxprt); break; } madenow = TRUE; } /* * Check if this (program, version, netid) had already been * registered. The check may save a few RPC calls to rpcbind */ for (pl = proglst; pl; pl = pl->p_nxt) if ((pl->p_prognum == prognum) && (pl->p_versnum == versnum) && (strcmp(pl->p_netid, netid) == 0)) break; if (pl == NULL) { /* Not yet */ (void) rpcb_unset(prognum, versnum, nconf); } else { /* so that svc_reg does not call rpcb_set() */ nconf = NULL; } if (!svc_reg(svcxprt, prognum, versnum, universal, nconf)) { warnx("%s couldn't register prog %u vers %u for %s", rpc_reg_msg, (unsigned)prognum, (unsigned)versnum, netid); if (madenow) { SVC_DESTROY(svcxprt); free(xdrbuf); free(netid); } continue; } pl = malloc(sizeof (struct proglst)); if (pl == NULL) { warnx(rpc_reg_err, rpc_reg_msg, __no_mem_str); if (madenow) { SVC_DESTROY(svcxprt); free(xdrbuf); free(netid); } break; } pl->p_progname = progname; pl->p_prognum = prognum; pl->p_versnum = versnum; pl->p_procnum = procnum; pl->p_inproc = inproc; pl->p_outproc = outproc; pl->p_transp = svcxprt; pl->p_xdrbuf = xdrbuf; pl->p_recvsz = recvsz; pl->p_netid = netid; pl->p_nxt = proglst; proglst = pl; done = TRUE; } __rpc_endconf(handle); mutex_unlock(&proglst_lock); if (done == FALSE) { warnx("%s cant find suitable transport for %s", rpc_reg_msg, nettype); return (-1); } return (0); } /* * The universal handler for the services registered using registerrpc. * It handles both the connectionless and the connection oriented cases. */ static void -universal(rqstp, transp) - struct svc_req *rqstp; - SVCXPRT *transp; +universal(struct svc_req *rqstp, SVCXPRT *transp) { rpcprog_t prog; rpcvers_t vers; rpcproc_t proc; char *outdata; char *xdrbuf; struct proglst *pl; /* * enforce "procnum 0 is echo" convention */ if (rqstp->rq_proc == NULLPROC) { if (svc_sendreply(transp, (xdrproc_t) xdr_void, NULL) == FALSE) { warnx("svc_sendreply failed"); } return; } prog = rqstp->rq_prog; vers = rqstp->rq_vers; proc = rqstp->rq_proc; mutex_lock(&proglst_lock); for (pl = proglst; pl; pl = pl->p_nxt) if (pl->p_prognum == prog && pl->p_procnum == proc && pl->p_versnum == vers && (strcmp(pl->p_netid, transp->xp_netid) == 0)) { /* decode arguments into a CLEAN buffer */ xdrbuf = pl->p_xdrbuf; /* Zero the arguments: reqd ! */ (void) memset(xdrbuf, 0, sizeof (pl->p_recvsz)); /* * Assuming that sizeof (xdrbuf) would be enough * for the arguments; if not then the program * may bomb. BEWARE! */ if (!svc_getargs(transp, pl->p_inproc, xdrbuf)) { svcerr_decode(transp); mutex_unlock(&proglst_lock); return; } outdata = (*(pl->p_progname))(xdrbuf); if (outdata == NULL && pl->p_outproc != (xdrproc_t) xdr_void){ /* there was an error */ mutex_unlock(&proglst_lock); return; } if (!svc_sendreply(transp, pl->p_outproc, outdata)) { warnx( "rpc: rpc_reg trouble replying to prog %u vers %u", (unsigned)prog, (unsigned)vers); mutex_unlock(&proglst_lock); return; } /* free the decoded arguments */ (void)svc_freeargs(transp, pl->p_inproc, xdrbuf); mutex_unlock(&proglst_lock); return; } mutex_unlock(&proglst_lock); /* This should never happen */ warnx("rpc: rpc_reg: never registered prog %u vers %u", (unsigned)prog, (unsigned)vers); return; } Index: projects/clang370-import/lib/libc =================================================================== --- projects/clang370-import/lib/libc (revision 288125) +++ projects/clang370-import/lib/libc (revision 288126) Property changes on: projects/clang370-import/lib/libc ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/lib/libc:r288100-288125 Index: projects/clang370-import/sys/arm/arm/stdatomic.c =================================================================== --- projects/clang370-import/sys/arm/arm/stdatomic.c (revision 288125) +++ projects/clang370-import/sys/arm/arm/stdatomic.c (revision 288126) @@ -1,874 +1,879 @@ /*- * Copyright (c) 2013 Ed Schouten * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* * Executing statements with interrupts disabled. */ #if defined(_KERNEL) && !defined(SMP) #define WITHOUT_INTERRUPTS(s) do { \ register_t regs; \ \ regs = intr_disable(); \ do s while (0); \ intr_restore(regs); \ } while (0) #endif /* _KERNEL && !SMP */ /* * Memory barriers. * * It turns out __sync_synchronize() does not emit any code when used * with GCC 4.2. Implement our own version that does work reliably. * * Although __sync_lock_test_and_set() should only perform an acquire * barrier, make it do a full barrier like the other functions. This * should make 's atomic_exchange_explicit() work reliably. */ #if defined(_KERNEL) && !defined(SMP) static inline void do_sync(void) { __asm volatile ("" : : : "memory"); } #elif __ARM_ARCH >= 6 static inline void do_sync(void) { dmb(); } #endif #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) /* * New C11 __atomic_* API. */ /* ARMv6+ systems should be supported by the compiler. */ #if __ARM_ARCH <= 5 /* Clang doesn't allow us to reimplement builtins without this. */ #ifdef __clang__ #pragma redefine_extname __sync_synchronize_ext __sync_synchronize #define __sync_synchronize __sync_synchronize_ext #endif void __sync_synchronize(void) { } #ifdef _KERNEL #ifdef SMP #error "On SMP systems we should have proper atomic operations." #endif /* * On uniprocessor systems, we can perform the atomic operations by * disabling interrupts. */ #define EMIT_LOAD_N(N, uintN_t) \ uintN_t \ __atomic_load_##N(uintN_t *mem, int model __unused) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ }); \ return (ret); \ } #define EMIT_STORE_N(N, uintN_t) \ void \ __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ \ WITHOUT_INTERRUPTS({ \ *mem = val; \ }); \ } #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ _Bool \ __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ uintN_t desired, int success __unused, int failure __unused) \ { \ _Bool ret; \ \ WITHOUT_INTERRUPTS({ \ if (*mem == *expected) { \ *mem = desired; \ ret = 1; \ } else { \ *expected = *mem; \ ret = 0; \ } \ }); \ return (ret); \ } #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ uintN_t \ __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ *mem op val; \ }); \ return (ret); \ } #define EMIT_ALL_OPS_N(N, uintN_t) \ EMIT_LOAD_N(N, uintN_t) \ EMIT_STORE_N(N, uintN_t) \ EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) EMIT_ALL_OPS_N(1, uint8_t) EMIT_ALL_OPS_N(2, uint16_t) EMIT_ALL_OPS_N(4, uint32_t) EMIT_ALL_OPS_N(8, uint64_t) #undef EMIT_ALL_OPS_N #else /* !_KERNEL */ /* * For userspace on uniprocessor systems, we can implement the atomic * operations by using a Restartable Atomic Sequence. This makes the * kernel restart the code from the beginning when interrupted. */ #define EMIT_LOAD_N(N, uintN_t) \ uintN_t \ __atomic_load_##N(uintN_t *mem, int model __unused) \ { \ \ return (*mem); \ } #define EMIT_STORE_N(N, uintN_t) \ void \ __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ \ *mem = val; \ } #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ uintN_t \ __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"str" %3, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ _Bool \ __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ uintN_t desired, int success __unused, int failure __unused) \ { \ uint32_t expected, old, temp, ras_start; \ \ expected = *pexpected; \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%6]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%6, #4]\n" \ \ "\t"ldr" %0, %5\n" /* Load old value. */ \ "\tcmp %0, %3\n" /* Compare to expected value. */\ "\t"streq" %4, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%6]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%6, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (expected), "r" (desired), "m" (*mem), \ "r" (ras_start)); \ if (old == expected) { \ return (1); \ } else { \ *pexpected = old; \ return (0); \ } \ } -#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ +#define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret) \ uintN_t \ __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ - uint32_t old, temp, ras_start; \ + uint32_t old, new, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ "\t"str" %2, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ - : "=&r" (old), "=m" (*mem), "=&r" (temp) \ + : "=&r" (old), "=m" (*mem), "=&r" (new) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ - return (old); \ + return (ret); \ } #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ EMIT_LOAD_N(N, uintN_t) \ EMIT_STORE_N(N, uintN_t) \ EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ -EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ -EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ -EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ -EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ -EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr", old) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch, "orr", new) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new) \ +EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new) EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") #undef EMIT_ALL_OPS_N #endif /* _KERNEL */ #endif /* __ARM_ARCH */ #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) #ifdef __clang__ #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 #endif /* * Old __sync_* API. */ #if __ARM_ARCH >= 6 /* Implementations for old GCC versions, lacking support for atomics. */ typedef union { uint8_t v8[4]; uint32_t v32; } reg_t; /* * Given a memory address pointing to an 8-bit or 16-bit integer, return * the address of the 32-bit word containing it. */ static inline uint32_t * round_to_word(void *ptr) { return ((uint32_t *)((intptr_t)ptr & ~3)); } /* * Utility functions for loading and storing 8-bit and 16-bit integers * in 32-bit words at an offset corresponding with the location of the * atomic variable. */ static inline void put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) { size_t offset; offset = (intptr_t)offset_ptr & 3; r->v8[offset] = val; } static inline uint8_t get_1(const reg_t *r, const uint8_t *offset_ptr) { size_t offset; offset = (intptr_t)offset_ptr & 3; return (r->v8[offset]); } static inline void put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) { size_t offset; union { uint16_t in; uint8_t out[2]; } bytes; offset = (intptr_t)offset_ptr & 3; bytes.in = val; r->v8[offset] = bytes.out[0]; r->v8[offset + 1] = bytes.out[1]; } static inline uint16_t get_2(const reg_t *r, const uint16_t *offset_ptr) { size_t offset; union { uint8_t in[2]; uint16_t out; } bytes; offset = (intptr_t)offset_ptr & 3; bytes.in[0] = r->v8[offset]; bytes.in[1] = r->v8[offset + 1]; return (bytes.out); } /* * 8-bit and 16-bit routines. * * These operations are not natively supported by the CPU, so we use * some shifting and bitmasking on top of the 32-bit instructions. */ #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ uintN_t \ __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t *mem32; \ reg_t val32, negmask, old; \ uint32_t temp1, temp2; \ \ mem32 = round_to_word(mem); \ val32.v32 = 0x00000000; \ put_##N(&val32, mem, val); \ negmask.v32 = 0xffffffff; \ put_##N(&negmask, mem, 0); \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %6\n" /* Load old value. */ \ "\tand %2, %5, %0\n" /* Remove the old value. */ \ "\torr %2, %2, %4\n" /* Put in the new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ uintN_t \ __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ uintN_t desired) \ { \ uint32_t *mem32; \ reg_t expected32, desired32, posmask, old; \ uint32_t negmask, temp1, temp2; \ \ mem32 = round_to_word(mem); \ expected32.v32 = 0x00000000; \ put_##N(&expected32, mem, expected); \ desired32.v32 = 0x00000000; \ put_##N(&desired32, mem, desired); \ posmask.v32 = 0x00000000; \ put_##N(&posmask, mem, ~0); \ negmask = ~posmask.v32; \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %8\n" /* Load old value. */ \ "\tand %2, %6, %0\n" /* Isolate the old value. */ \ "\tcmp %2, %4\n" /* Compare to expected value. */\ "\tbne 2f\n" /* Values are unequal. */ \ "\tand %2, %7, %0\n" /* Remove the old value. */ \ "\torr %2, %5\n" /* Put in the new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ "2:" \ : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (expected32.v32), "r" (desired32.v32), \ "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ uintN_t \ __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t *mem32; \ reg_t val32, posmask, old; \ uint32_t negmask, temp1, temp2; \ \ mem32 = round_to_word(mem); \ val32.v32 = 0x00000000; \ put_##N(&val32, mem, val); \ posmask.v32 = 0x00000000; \ put_##N(&posmask, mem, ~0); \ negmask = ~posmask.v32; \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %7\n" /* Load old value. */ \ "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ "\tand %2, %5\n" /* Isolate the new value. */ \ "\tand %3, %6, %0\n" /* Remove the old value. */ \ "\torr %2, %2, %3\n" /* Put in the new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ uintN_t \ __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t *mem32; \ reg_t val32, old; \ uint32_t temp1, temp2; \ \ mem32 = round_to_word(mem); \ val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ put_##N(&val32, mem, val); \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %5\n" /* Load old value. */ \ "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val32.v32), "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) /* * 32-bit routines. */ uint32_t __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) { uint32_t old, temp; do_sync(); __asm volatile ( "1:" "\tldrex %0, %4\n" /* Load old value. */ "\tstrex %2, %3, %1\n" /* Attempt to store. */ "\tcmp %2, #0\n" /* Did it succeed? */ "\tbne 1b\n" /* Spin if failed. */ : "=&r" (old), "=m" (*mem), "=&r" (temp) : "r" (val), "m" (*mem)); return (old); } uint32_t __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, uint32_t desired) { uint32_t old, temp; do_sync(); __asm volatile ( "1:" "\tldrex %0, %5\n" /* Load old value. */ "\tcmp %0, %3\n" /* Compare to expected value. */ "\tbne 2f\n" /* Values are unequal. */ "\tstrex %2, %4, %1\n" /* Attempt to store. */ "\tcmp %2, #0\n" /* Did it succeed? */ "\tbne 1b\n" /* Spin if failed. */ "2:" : "=&r" (old), "=m" (*mem), "=&r" (temp) : "r" (expected), "r" (desired), "m" (*mem)); return (old); } #define EMIT_FETCH_AND_OP_4(name, op) \ uint32_t \ __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ { \ uint32_t old, temp1, temp2; \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %5\n" /* Load old value. */ \ "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val), "m" (*mem)); \ return (old); \ } EMIT_FETCH_AND_OP_4(fetch_and_add, "add") EMIT_FETCH_AND_OP_4(fetch_and_and, "and") EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") #ifndef __clang__ __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); #endif #else /* __ARM_ARCH < 6 */ #ifdef _KERNEL #ifdef SMP #error "On SMP systems we should have proper atomic operations." #endif /* * On uniprocessor systems, we can perform the atomic operations by * disabling interrupts. */ #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ uintN_t \ __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ uintN_t desired) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ if (*mem == expected) \ *mem = desired; \ }); \ return (ret); \ } #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ uintN_t \ __sync_##name##_##N(uintN_t *mem, uintN_t val) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ *mem op val; \ }); \ return (ret); \ } #define EMIT_ALL_OPS_N(N, uintN_t) \ EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) EMIT_ALL_OPS_N(1, uint8_t) EMIT_ALL_OPS_N(2, uint16_t) EMIT_ALL_OPS_N(4, uint32_t) EMIT_ALL_OPS_N(8, uint64_t) #undef EMIT_ALL_OPS_N #else /* !_KERNEL */ /* * For userspace on uniprocessor systems, we can implement the atomic * operations by using a Restartable Atomic Sequence. This makes the * kernel restart the code from the beginning when interrupted. */ #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ uintN_t \ __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"str" %3, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ uintN_t \ __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ uintN_t desired) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%6]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%6, #4]\n" \ \ "\t"ldr" %0, %5\n" /* Load old value. */ \ "\tcmp %0, %3\n" /* Compare to expected value. */\ "\t"streq" %4, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%6]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%6, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (expected), "r" (desired), "m" (*mem), \ "r" (ras_start)); \ return (old); \ } #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ uintN_t \ __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ "\t"str" %2, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") #ifdef __clang__ EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") #else EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") #endif EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") #ifndef __clang__ __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); #endif /* __ARM_ARCH */ #endif /* _KERNEL */ #endif #endif /* __SYNC_ATOMICS */ Index: projects/clang370-import/sys/arm64/arm64/exception.S =================================================================== --- projects/clang370-import/sys/arm64/arm64/exception.S (revision 288125) +++ projects/clang370-import/sys/arm64/arm64/exception.S (revision 288126) @@ -1,204 +1,207 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "assym.s" .text .macro save_registers el .if \el == 1 mov x18, sp sub sp, sp, #128 .endif stp x28, x29, [sp, #-16]! stp x26, x27, [sp, #-16]! stp x24, x25, [sp, #-16]! stp x22, x23, [sp, #-16]! stp x20, x21, [sp, #-16]! stp x18, x19, [sp, #-16]! stp x16, x17, [sp, #-16]! stp x14, x15, [sp, #-16]! stp x12, x13, [sp, #-16]! stp x10, x11, [sp, #-16]! stp x8, x9, [sp, #-16]! stp x6, x7, [sp, #-16]! stp x4, x5, [sp, #-16]! stp x2, x3, [sp, #-16]! stp x0, x1, [sp, #-16]! mrs x10, elr_el1 mrs x11, spsr_el1 .if \el == 0 mrs x18, sp_el0 .endif stp x10, x11, [sp, #-16]! stp x18, lr, [sp, #-16]! mrs x18, tpidr_el1 .endm .macro restore_registers el - msr daifset, #2 /* Disable interrupts, x18 may change - * in the interrupt exception handler */ +.if \el == 1 + msr daifset, #2 + /* + * Disable interrupts, x18 may change in the interrupt exception + * handler. For EL0 exceptions, do_ast already did this. + */ +.endif ldp x18, lr, [sp], #16 ldp x10, x11, [sp], #16 .if \el == 0 msr sp_el0, x18 .endif msr spsr_el1, x11 msr elr_el1, x10 ldp x0, x1, [sp], #16 ldp x2, x3, [sp], #16 ldp x4, x5, [sp], #16 ldp x6, x7, [sp], #16 ldp x8, x9, [sp], #16 ldp x10, x11, [sp], #16 ldp x12, x13, [sp], #16 ldp x14, x15, [sp], #16 ldp x16, x17, [sp], #16 .if \el == 0 ldp x18, x19, [sp], #16 .else ldp xzr, x19, [sp], #16 .endif ldp x20, x21, [sp], #16 ldp x22, x23, [sp], #16 ldp x24, x25, [sp], #16 ldp x26, x27, [sp], #16 ldp x28, x29, [sp], #16 .if \el == 1 mov sp, x18 mrs x18, tpidr_el1 .endif .endm .macro do_ast /* Disable interrupts */ mrs x19, daif +1: msr daifset, #2 /* Read the current thread flags */ -1: ldr x1, [x18, #PC_CURTHREAD] /* Load curthread */ + ldr x1, [x18, #PC_CURTHREAD] /* Load curthread */ ldr x2, [x1, #TD_FLAGS] /* Check if we have either bits set */ mov x3, #((TDF_ASTPENDING|TDF_NEEDRESCHED) >> 8) lsl x3, x3, #8 and x2, x2, x3 cmp x2, #0 b.eq 2f /* Restore interrupts */ msr daif, x19 /* handle the ast */ mov x0, sp bl _C_LABEL(ast) - /* Disable interrupts */ - mrs x19, daif - msr daifset, #2 - + /* Re-check for new ast scheduled */ + b 1b 2: - /* Restore interrupts */ - msr daif, x19 .endm handle_el1h_sync: save_registers 1 mov x0, sp bl do_el1h_sync restore_registers 1 eret handle_el1h_irq: save_registers 1 mov x0, sp bl arm_cpu_intr restore_registers 1 eret handle_el1h_error: brk 0xf13 handle_el0_sync: save_registers 0 mov x0, sp bl do_el0_sync do_ast restore_registers 0 eret handle_el0_irq: save_registers 0 mov x0, sp bl arm_cpu_intr + do_ast restore_registers 0 eret handle_el0_error: save_registers 0 mov x0, sp bl do_el0_error brk 0xf23 1: b 1b .macro vempty .align 7 brk 0xfff 1: b 1b .endm .macro vector name .align 7 b handle_\name .endm .align 11 .globl exception_vectors exception_vectors: vempty /* Synchronous EL1t */ vempty /* IRQ EL1t */ vempty /* FIQ EL1t */ vempty /* Error EL1t */ vector el1h_sync /* Synchronous EL1h */ vector el1h_irq /* IRQ EL1h */ vempty /* FIQ EL1h */ vector el1h_error /* Error EL1h */ vector el0_sync /* Synchronous 64-bit EL0 */ vector el0_irq /* IRQ 64-bit EL0 */ vempty /* FIQ 64-bit EL0 */ vector el0_error /* Error 64-bit EL0 */ vempty /* Synchronous 32-bit EL0 */ vempty /* IRQ 32-bit EL0 */ vempty /* FIQ 32-bit EL0 */ vempty /* Error 32-bit EL0 */ Index: projects/clang370-import/sys/arm64/cavium/thunder_pcie_pem.c =================================================================== --- projects/clang370-import/sys/arm64/cavium/thunder_pcie_pem.c (revision 288125) +++ projects/clang370-import/sys/arm64/cavium/thunder_pcie_pem.c (revision 288126) @@ -1,637 +1,645 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Semihalf under * the sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* PCIe external MAC root complex driver (PEM) for Cavium Thunder SOC */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "thunder_pcie_common.h" #include "pcib_if.h" #define THUNDER_PEM_DEVICE_ID 0xa020 #define THUNDER_PEM_VENDOR_ID 0x177d #define THUNDER_PEM_DESC "ThunderX PEM" /* ThunderX specific defines */ #define THUNDER_PEMn_REG_BASE(unit) (0x87e0c0000000UL | ((unit) << 24)) #define PCIERC_CFG002 0x08 #define PCIERC_CFG006 0x18 #define PCIERC_CFG032 0x80 #define PCIERC_CFG006_SEC_BUS(reg) (((reg) >> 8) & 0xFF) #define PEM_CFG_RD_REG_ALIGN(reg) ((reg) & ~0x3) #define PEM_CFG_RD_REG_DATA(val) (((val) >> 32) & 0xFFFFFFFF) #define PEM_CFG_RD 0x30 #define PEM_CFG_LINK_MASK 0x3 #define PEM_CFG_LINK_RDY 0x3 #define PEM_CFG_SLIX_TO_REG(slix) ((slix) << 4) #define SBNUM_OFFSET 0x8 #define SBNUM_MASK 0xFF #define PEM_ON_REG 0x420 #define PEM_CTL_STATUS 0x0 #define PEM_LINK_ENABLE (1 << 4) #define PEM_LINK_DLLA (1 << 29) #define PEM_LINK_LT (1 << 27) #define PEM_BUS_SHIFT (24) #define PEM_SLOT_SHIFT (19) #define PEM_FUNC_SHIFT (16) #define SLIX_S2M_REGX_ACC 0x874001000000UL #define SLIX_S2M_REGX_ACC_SIZE 0x1000 #define SLIX_S2M_REGX_ACC_SPACING 0x001000000000UL #define SLI_BASE 0x880000000000UL #define SLI_WINDOW_SPACING 0x004000000000UL #define SLI_WINDOW_SIZE 0x0000FF000000UL #define SLI_PCI_OFFSET 0x001000000000UL #define SLI_NODE_SHIFT (44) #define SLI_NODE_MASK (3) #define SLI_GROUP_SHIFT (40) #define SLI_ID_SHIFT (24) #define SLI_ID_MASK (7) #define SLI_PEMS_PER_GROUP (3) #define SLI_GROUPS_PER_NODE (2) #define SLI_PEMS_PER_NODE (SLI_PEMS_PER_GROUP * SLI_GROUPS_PER_NODE) #define SLI_ACC_REG_CNT (256) /* * Each PEM device creates its own bus with * own address translation, so we can adjust bus addresses * as we want. To support 32-bit cards let's assume * PCI window assignment looks as following: * * 0x00000000 - 0x000FFFFF IO * 0x00100000 - 0xFFFFFFFF Memory */ #define PCI_IO_BASE 0x00000000UL #define PCI_IO_SIZE 0x00100000UL #define PCI_MEMORY_BASE PCI_IO_SIZE #define PCI_MEMORY_SIZE 0xFFF00000UL struct thunder_pem_softc { device_t dev; struct resource *reg; bus_space_tag_t reg_bst; bus_space_handle_t reg_bsh; struct pcie_range ranges[MAX_RANGES_TUPLES]; struct rman mem_rman; struct rman io_rman; bus_space_handle_t pem_sli_base; uint32_t node; uint32_t id; uint32_t sli; uint32_t sli_group; uint64_t sli_window_base; }; static struct resource * thunder_pem_alloc_resource(device_t, device_t, int, int *, u_long, u_long, u_long, u_int); static int thunder_pem_attach(device_t); static int thunder_pem_detach(device_t); static uint64_t thunder_pem_config_reg_read(struct thunder_pem_softc *, int); static int thunder_pem_link_init(struct thunder_pem_softc *); static int thunder_pem_maxslots(device_t); static int thunder_pem_probe(device_t); static uint32_t thunder_pem_read_config(device_t, u_int, u_int, u_int, u_int, int); static int thunder_pem_read_ivar(device_t, device_t, int, uintptr_t *); static void thunder_pem_release_all(device_t); static int thunder_pem_release_resource(device_t, device_t, int, int, struct resource *); static void thunder_pem_slix_s2m_regx_acc_modify(struct thunder_pem_softc *, int, int); static void thunder_pem_write_config(device_t, u_int, u_int, u_int, u_int, uint32_t, int); static int thunder_pem_write_ivar(device_t, device_t, int, uintptr_t); /* Global handlers for SLI interface */ static bus_space_handle_t sli0_s2m_regx_base = 0; static bus_space_handle_t sli1_s2m_regx_base = 0; static device_method_t thunder_pem_methods[] = { /* Device interface */ DEVMETHOD(device_probe, thunder_pem_probe), DEVMETHOD(device_attach, thunder_pem_attach), DEVMETHOD(device_detach, thunder_pem_detach), DEVMETHOD(pcib_maxslots, thunder_pem_maxslots), DEVMETHOD(pcib_read_config, thunder_pem_read_config), DEVMETHOD(pcib_write_config, thunder_pem_write_config), DEVMETHOD(bus_read_ivar, thunder_pem_read_ivar), DEVMETHOD(bus_write_ivar, thunder_pem_write_ivar), DEVMETHOD(bus_alloc_resource, thunder_pem_alloc_resource), DEVMETHOD(bus_release_resource, thunder_pem_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), DEVMETHOD(pcib_map_msi, thunder_common_map_msi), DEVMETHOD(pcib_alloc_msix, thunder_common_alloc_msix), DEVMETHOD(pcib_release_msix, thunder_common_release_msix), DEVMETHOD(pcib_alloc_msi, thunder_common_alloc_msi), DEVMETHOD(pcib_release_msi, thunder_common_release_msi), DEVMETHOD_END }; static driver_t thunder_pem_driver = { "pcib", thunder_pem_methods, sizeof(struct thunder_pem_softc), }; static int thunder_pem_maxslots(device_t dev) { +#if 0 /* max slots per bus acc. to standard */ return (PCI_SLOTMAX); +#else + /* + * ARM64TODO Workaround - otherwise an em(4) interface appears to be + * present on every PCI function on the bus to which it is connected + */ + return (0); +#endif } static int thunder_pem_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { struct thunder_pem_softc *sc; int secondary_bus = 0; sc = device_get_softc(dev); if (index == PCIB_IVAR_BUS) { secondary_bus = thunder_pem_config_reg_read(sc, PCIERC_CFG006); *result = PCIERC_CFG006_SEC_BUS(secondary_bus); return (0); } if (index == PCIB_IVAR_DOMAIN) { *result = sc->id; return (0); } return (ENOENT); } static int thunder_pem_write_ivar(device_t dev, device_t child, int index, uintptr_t value) { return (ENOENT); } static int thunder_pem_identify(device_t dev) { struct thunder_pem_softc *sc; u_long start; sc = device_get_softc(dev); start = rman_get_start(sc->reg); /* Calculate PEM designations from its address */ sc->node = (start >> SLI_NODE_SHIFT) & SLI_NODE_MASK; sc->id = ((start >> SLI_ID_SHIFT) & SLI_ID_MASK) + (SLI_PEMS_PER_NODE * sc->node); sc->sli = sc->id % SLI_PEMS_PER_GROUP; sc->sli_group = (sc->id / SLI_PEMS_PER_GROUP) % SLI_GROUPS_PER_NODE; sc->sli_window_base = SLI_BASE | (((uint64_t)sc->node) << SLI_NODE_SHIFT) | ((uint64_t)sc->sli_group << SLI_GROUP_SHIFT); sc->sli_window_base += SLI_WINDOW_SPACING * sc->sli; return (0); } static void thunder_pem_slix_s2m_regx_acc_modify(struct thunder_pem_softc *sc, int sli_group, int slix) { uint64_t regval; bus_space_handle_t handle = 0; KASSERT(slix >= 0 && slix <= SLI_ACC_REG_CNT, ("Invalid SLI index")); if (sli_group == 0) handle = sli0_s2m_regx_base; else if (sli_group == 1) handle = sli1_s2m_regx_base; else device_printf(sc->dev, "SLI group is not correct\n"); if (handle) { /* Clear lower 32-bits of the SLIx register */ regval = bus_space_read_8(sc->reg_bst, handle, PEM_CFG_SLIX_TO_REG(slix)); regval &= ~(0xFFFFFFFFUL); bus_space_write_8(sc->reg_bst, handle, PEM_CFG_SLIX_TO_REG(slix), regval); } } static int thunder_pem_link_init(struct thunder_pem_softc *sc) { uint64_t regval; /* check whether PEM is safe to access. */ regval = bus_space_read_8(sc->reg_bst, sc->reg_bsh, PEM_ON_REG); if ((regval & PEM_CFG_LINK_MASK) != PEM_CFG_LINK_RDY) { device_printf(sc->dev, "PEM%d is not ON\n", sc->id); return (ENXIO); } regval = bus_space_read_8(sc->reg_bst, sc->reg_bsh, PEM_CTL_STATUS); regval |= PEM_LINK_ENABLE; bus_space_write_8(sc->reg_bst, sc->reg_bsh, PEM_CTL_STATUS, regval); /* Wait 1ms as per Cavium specification */ DELAY(1000); regval = thunder_pem_config_reg_read(sc, PCIERC_CFG032); if (((regval & PEM_LINK_DLLA) == 0) || ((regval & PEM_LINK_LT) != 0)) { device_printf(sc->dev, "PCIe RC: Port %d Link Timeout\n", sc->id); return (ENXIO); } return (0); } static int thunder_pem_init(struct thunder_pem_softc *sc) { int i, retval = 0; retval = thunder_pem_link_init(sc); if (retval) { device_printf(sc->dev, "%s failed\n", __func__); return retval; } retval = bus_space_map(sc->reg_bst, sc->sli_window_base, SLI_WINDOW_SIZE, 0, &sc->pem_sli_base); if (retval) { device_printf(sc->dev, "Unable to map RC%d pem_addr base address", sc->id); return (ENOMEM); } /* To support 32-bit PCIe devices, set S2M_REGx_ACC[BA]=0x0 */ for (i = 0; i < SLI_ACC_REG_CNT; i++) { thunder_pem_slix_s2m_regx_acc_modify(sc, sc->sli_group, i); } return (retval); } static uint64_t thunder_pem_config_reg_read(struct thunder_pem_softc *sc, int reg) { uint64_t data; /* Write to ADDR register */ bus_space_write_8(sc->reg_bst, sc->reg_bsh, PEM_CFG_RD, PEM_CFG_RD_REG_ALIGN(reg)); bus_space_barrier(sc->reg_bst, sc->reg_bsh, PEM_CFG_RD, 8, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); /* Read from DATA register */ data = PEM_CFG_RD_REG_DATA(bus_space_read_8(sc->reg_bst, sc->reg_bsh, PEM_CFG_RD)); return (data); } static uint32_t thunder_pem_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int bytes) { uint64_t offset; uint32_t data; struct thunder_pem_softc *sc; bus_space_tag_t t; bus_space_handle_t h; if ((bus > PCI_BUSMAX) || (slot > PCI_SLOTMAX) || (func > PCI_FUNCMAX) || (reg > PCIE_REGMAX)) return (~0U); sc = device_get_softc(dev); /* Calculate offset */ offset = (bus << PEM_BUS_SHIFT) | (slot << PEM_SLOT_SHIFT) | (func << PEM_FUNC_SHIFT) | reg; t = sc->reg_bst; h = sc->pem_sli_base; switch (bytes) { case 1: data = bus_space_read_1(t, h, offset); break; case 2: data = le16toh(bus_space_read_2(t, h, offset)); break; case 4: data = le32toh(bus_space_read_4(t, h, offset)); break; default: return (~0U); } return (data); } static void thunder_pem_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t val, int bytes) { uint64_t offset; struct thunder_pem_softc *sc; bus_space_tag_t t; bus_space_handle_t h; if ((bus > PCI_BUSMAX) || (slot > PCI_SLOTMAX) || (func > PCI_FUNCMAX) || (reg > PCIE_REGMAX)) return; sc = device_get_softc(dev); /* Calculate offset */ offset = (bus << PEM_BUS_SHIFT) | (slot << PEM_SLOT_SHIFT) | (func << PEM_FUNC_SHIFT) | reg; t = sc->reg_bst; h = sc->pem_sli_base; switch (bytes) { case 1: bus_space_write_1(t, h, offset, val); break; case 2: bus_space_write_2(t, h, offset, htole16(val)); break; case 4: bus_space_write_4(t, h, offset, htole32(val)); break; default: return; } } static struct resource * thunder_pem_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct thunder_pem_softc *sc = device_get_softc(dev); struct rman *rm = NULL; struct resource *res; device_t parent_dev; switch (type) { case SYS_RES_IOPORT: rm = &sc->io_rman; break; case SYS_RES_MEMORY: rm = &sc->mem_rman; break; default: /* Find parent device. On ThunderX we know an exact path. */ parent_dev = device_get_parent(device_get_parent(dev)); return (BUS_ALLOC_RESOURCE(parent_dev, dev, type, rid, start, end, count, flags)); }; if ((start == 0UL) && (end == ~0UL)) { device_printf(dev, "Cannot allocate resource with unspecified range\n"); goto fail; } /* Translate PCI address to host PHYS */ if (range_addr_is_pci(sc->ranges, start, count) == 0) goto fail; start = range_addr_pci_to_phys(sc->ranges, start); end = start + count - 1; if (bootverbose) { device_printf(dev, "rman_reserve_resource: start=%#lx, end=%#lx, count=%#lx\n", start, end, count); } res = rman_reserve_resource(rm, start, end, count, flags, child); if (res == NULL) goto fail; rman_set_rid(res, *rid); if (flags & RF_ACTIVE) if (bus_activate_resource(child, type, *rid, res)) { rman_release_resource(res); goto fail; } return (res); fail: if (bootverbose) { device_printf(dev, "%s FAIL: type=%d, rid=%d, " "start=%016lx, end=%016lx, count=%016lx, flags=%x\n", __func__, type, *rid, start, end, count, flags); } return (NULL); } static int thunder_pem_release_resource(device_t dev, device_t child, int type, int rid, struct resource *res) { device_t parent_dev; /* Find parent device. On ThunderX we know an exact path. */ parent_dev = device_get_parent(device_get_parent(dev)); if ((type != SYS_RES_MEMORY) && (type != SYS_RES_IOPORT)) return (BUS_RELEASE_RESOURCE(parent_dev, child, type, rid, res)); return (rman_release_resource(res)); } static int thunder_pem_probe(device_t dev) { uint16_t pci_vendor_id; uint16_t pci_device_id; pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); if ((pci_vendor_id == THUNDER_PEM_VENDOR_ID) && (pci_device_id == THUNDER_PEM_DEVICE_ID)) { device_set_desc_copy(dev, THUNDER_PEM_DESC); return (0); } return (ENXIO); } static int thunder_pem_attach(device_t dev) { struct thunder_pem_softc *sc; int error; int rid; sc = device_get_softc(dev); sc->dev = dev; /* Allocate memory for BAR(0) */ rid = PCIR_BAR(0); sc->reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->reg == NULL) { device_printf(dev, "Failed to allocate resource\n"); return (ENXIO); } sc->reg_bst = rman_get_bustag(sc->reg); sc->reg_bsh = rman_get_bushandle(sc->reg); /* Map SLI, do it only once */ if (!sli0_s2m_regx_base) { bus_space_map(sc->reg_bst, SLIX_S2M_REGX_ACC, SLIX_S2M_REGX_ACC_SIZE, 0, &sli0_s2m_regx_base); } if (!sli1_s2m_regx_base) { bus_space_map(sc->reg_bst, SLIX_S2M_REGX_ACC + SLIX_S2M_REGX_ACC_SPACING, SLIX_S2M_REGX_ACC_SIZE, 0, &sli1_s2m_regx_base); } if ((sli0_s2m_regx_base == 0) || (sli1_s2m_regx_base == 0)) { device_printf(dev, "bus_space_map failed to map slix_s2m_regx_base\n"); goto fail; } /* Identify PEM */ if (thunder_pem_identify(dev) != 0) goto fail; /* Initialize rman and allocate regions */ sc->mem_rman.rm_type = RMAN_ARRAY; sc->mem_rman.rm_descr = "PEM PCIe Memory"; error = rman_init(&sc->mem_rman); if (error != 0) { device_printf(dev, "memory rman_init() failed. error = %d\n", error); goto fail; } sc->io_rman.rm_type = RMAN_ARRAY; sc->io_rman.rm_descr = "PEM PCIe IO"; error = rman_init(&sc->io_rman); if (error != 0) { device_printf(dev, "IO rman_init() failed. error = %d\n", error); goto fail_mem; } /* Fill memory window */ sc->ranges[0].pci_base = PCI_MEMORY_BASE; sc->ranges[0].size = PCI_MEMORY_SIZE; sc->ranges[0].phys_base = sc->sli_window_base + SLI_PCI_OFFSET + sc->ranges[0].pci_base; rman_manage_region(&sc->mem_rman, sc->ranges[0].phys_base, sc->ranges[0].phys_base + sc->ranges[0].size - 1); /* Fill IO window */ sc->ranges[1].pci_base = PCI_IO_BASE; sc->ranges[1].size = PCI_IO_SIZE; sc->ranges[1].phys_base = sc->sli_window_base + SLI_PCI_OFFSET + sc->ranges[1].pci_base; rman_manage_region(&sc->io_rman, sc->ranges[1].phys_base, sc->ranges[1].phys_base + sc->ranges[1].size - 1); if (thunder_pem_init(sc)) { device_printf(dev, "Failure during PEM init\n"); goto fail_io; } device_add_child(dev, "pci", -1); return (bus_generic_attach(dev)); fail_io: rman_fini(&sc->io_rman); fail_mem: rman_fini(&sc->mem_rman); fail: bus_free_resource(dev, SYS_RES_MEMORY, sc->reg); return (ENXIO); } static void thunder_pem_release_all(device_t dev) { struct thunder_pem_softc *sc; sc = device_get_softc(dev); rman_fini(&sc->io_rman); rman_fini(&sc->mem_rman); if (sc->reg != NULL) bus_free_resource(dev, SYS_RES_MEMORY, sc->reg); } static int thunder_pem_detach(device_t dev) { thunder_pem_release_all(dev); return (0); } static devclass_t thunder_pem_devclass; DRIVER_MODULE(thunder_pem, pci, thunder_pem_driver, thunder_pem_devclass, 0, 0); MODULE_DEPEND(thunder_pem, pci, 1, 1, 1); Index: projects/clang370-import/sys/cam/ctl/ctl.c =================================================================== --- projects/clang370-import/sys/cam/ctl/ctl.c (revision 288125) +++ projects/clang370-import/sys/cam/ctl/ctl.c (revision 288126) @@ -1,13547 +1,13597 @@ /*- * Copyright (c) 2003-2009 Silicon Graphics International Corp. * Copyright (c) 2012 The FreeBSD Foundation * Copyright (c) 2015 Alexander Motin * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id$ */ /* * CAM Target Layer, a SCSI device emulation subsystem. * * Author: Ken Merry */ #define _CTL_C #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct ctl_softc *control_softc = NULL; /* * Template mode pages. */ /* * Note that these are default values only. The actual values will be * filled in when the user does a mode sense. */ const static struct copan_debugconf_subpage debugconf_page_default = { DBGCNF_PAGE_CODE | SMPH_SPF, /* page_code */ DBGCNF_SUBPAGE_CODE, /* subpage */ {(sizeof(struct copan_debugconf_subpage) - 4) >> 8, (sizeof(struct copan_debugconf_subpage) - 4) >> 0}, /* page_length */ DBGCNF_VERSION, /* page_version */ {CTL_TIME_IO_DEFAULT_SECS>>8, CTL_TIME_IO_DEFAULT_SECS>>0}, /* ctl_time_io_secs */ }; const static struct copan_debugconf_subpage debugconf_page_changeable = { DBGCNF_PAGE_CODE | SMPH_SPF, /* page_code */ DBGCNF_SUBPAGE_CODE, /* subpage */ {(sizeof(struct copan_debugconf_subpage) - 4) >> 8, (sizeof(struct copan_debugconf_subpage) - 4) >> 0}, /* page_length */ 0, /* page_version */ {0xff,0xff}, /* ctl_time_io_secs */ }; const static struct scsi_da_rw_recovery_page rw_er_page_default = { /*page_code*/SMS_RW_ERROR_RECOVERY_PAGE, /*page_length*/sizeof(struct scsi_da_rw_recovery_page) - 2, /*byte3*/SMS_RWER_AWRE|SMS_RWER_ARRE, /*read_retry_count*/0, /*correction_span*/0, /*head_offset_count*/0, /*data_strobe_offset_cnt*/0, /*byte8*/SMS_RWER_LBPERE, /*write_retry_count*/0, /*reserved2*/0, /*recovery_time_limit*/{0, 0}, }; const static struct scsi_da_rw_recovery_page rw_er_page_changeable = { /*page_code*/SMS_RW_ERROR_RECOVERY_PAGE, /*page_length*/sizeof(struct scsi_da_rw_recovery_page) - 2, /*byte3*/0, /*read_retry_count*/0, /*correction_span*/0, /*head_offset_count*/0, /*data_strobe_offset_cnt*/0, /*byte8*/0, /*write_retry_count*/0, /*reserved2*/0, /*recovery_time_limit*/{0, 0}, }; const static struct scsi_format_page format_page_default = { /*page_code*/SMS_FORMAT_DEVICE_PAGE, /*page_length*/sizeof(struct scsi_format_page) - 2, /*tracks_per_zone*/ {0, 0}, /*alt_sectors_per_zone*/ {0, 0}, /*alt_tracks_per_zone*/ {0, 0}, /*alt_tracks_per_lun*/ {0, 0}, /*sectors_per_track*/ {(CTL_DEFAULT_SECTORS_PER_TRACK >> 8) & 0xff, CTL_DEFAULT_SECTORS_PER_TRACK & 0xff}, /*bytes_per_sector*/ {0, 0}, /*interleave*/ {0, 0}, /*track_skew*/ {0, 0}, /*cylinder_skew*/ {0, 0}, /*flags*/ SFP_HSEC, /*reserved*/ {0, 0, 0} }; const static struct scsi_format_page format_page_changeable = { /*page_code*/SMS_FORMAT_DEVICE_PAGE, /*page_length*/sizeof(struct scsi_format_page) - 2, /*tracks_per_zone*/ {0, 0}, /*alt_sectors_per_zone*/ {0, 0}, /*alt_tracks_per_zone*/ {0, 0}, /*alt_tracks_per_lun*/ {0, 0}, /*sectors_per_track*/ {0, 0}, /*bytes_per_sector*/ {0, 0}, /*interleave*/ {0, 0}, /*track_skew*/ {0, 0}, /*cylinder_skew*/ {0, 0}, /*flags*/ 0, /*reserved*/ {0, 0, 0} }; const static struct scsi_rigid_disk_page rigid_disk_page_default = { /*page_code*/SMS_RIGID_DISK_PAGE, /*page_length*/sizeof(struct scsi_rigid_disk_page) - 2, /*cylinders*/ {0, 0, 0}, /*heads*/ CTL_DEFAULT_HEADS, /*start_write_precomp*/ {0, 0, 0}, /*start_reduced_current*/ {0, 0, 0}, /*step_rate*/ {0, 0}, /*landing_zone_cylinder*/ {0, 0, 0}, /*rpl*/ SRDP_RPL_DISABLED, /*rotational_offset*/ 0, /*reserved1*/ 0, /*rotation_rate*/ {(CTL_DEFAULT_ROTATION_RATE >> 8) & 0xff, CTL_DEFAULT_ROTATION_RATE & 0xff}, /*reserved2*/ {0, 0} }; const static struct scsi_rigid_disk_page rigid_disk_page_changeable = { /*page_code*/SMS_RIGID_DISK_PAGE, /*page_length*/sizeof(struct scsi_rigid_disk_page) - 2, /*cylinders*/ {0, 0, 0}, /*heads*/ 0, /*start_write_precomp*/ {0, 0, 0}, /*start_reduced_current*/ {0, 0, 0}, /*step_rate*/ {0, 0}, /*landing_zone_cylinder*/ {0, 0, 0}, /*rpl*/ 0, /*rotational_offset*/ 0, /*reserved1*/ 0, /*rotation_rate*/ {0, 0}, /*reserved2*/ {0, 0} }; const static struct scsi_caching_page caching_page_default = { /*page_code*/SMS_CACHING_PAGE, /*page_length*/sizeof(struct scsi_caching_page) - 2, /*flags1*/ SCP_DISC | SCP_WCE, /*ret_priority*/ 0, /*disable_pf_transfer_len*/ {0xff, 0xff}, /*min_prefetch*/ {0, 0}, /*max_prefetch*/ {0xff, 0xff}, /*max_pf_ceiling*/ {0xff, 0xff}, /*flags2*/ 0, /*cache_segments*/ 0, /*cache_seg_size*/ {0, 0}, /*reserved*/ 0, /*non_cache_seg_size*/ {0, 0, 0} }; const static struct scsi_caching_page caching_page_changeable = { /*page_code*/SMS_CACHING_PAGE, /*page_length*/sizeof(struct scsi_caching_page) - 2, /*flags1*/ SCP_WCE | SCP_RCD, /*ret_priority*/ 0, /*disable_pf_transfer_len*/ {0, 0}, /*min_prefetch*/ {0, 0}, /*max_prefetch*/ {0, 0}, /*max_pf_ceiling*/ {0, 0}, /*flags2*/ 0, /*cache_segments*/ 0, /*cache_seg_size*/ {0, 0}, /*reserved*/ 0, /*non_cache_seg_size*/ {0, 0, 0} }; const static struct scsi_control_page control_page_default = { /*page_code*/SMS_CONTROL_MODE_PAGE, /*page_length*/sizeof(struct scsi_control_page) - 2, /*rlec*/0, /*queue_flags*/SCP_QUEUE_ALG_RESTRICTED, /*eca_and_aen*/0, /*flags4*/SCP_TAS, /*aen_holdoff_period*/{0, 0}, /*busy_timeout_period*/{0, 0}, /*extended_selftest_completion_time*/{0, 0} }; const static struct scsi_control_page control_page_changeable = { /*page_code*/SMS_CONTROL_MODE_PAGE, /*page_length*/sizeof(struct scsi_control_page) - 2, /*rlec*/SCP_DSENSE, /*queue_flags*/SCP_QUEUE_ALG_MASK, /*eca_and_aen*/SCP_SWP, /*flags4*/0, /*aen_holdoff_period*/{0, 0}, /*busy_timeout_period*/{0, 0}, /*extended_selftest_completion_time*/{0, 0} }; +#define CTL_CEM_LEN (sizeof(struct scsi_control_ext_page) - 4) + +const static struct scsi_control_ext_page control_ext_page_default = { + /*page_code*/SMS_CONTROL_MODE_PAGE | SMPH_SPF, + /*subpage_code*/0x01, + /*page_length*/{CTL_CEM_LEN >> 8, CTL_CEM_LEN}, + /*flags*/0, + /*prio*/0, + /*max_sense*/0 +}; + +const static struct scsi_control_ext_page control_ext_page_changeable = { + /*page_code*/SMS_CONTROL_MODE_PAGE | SMPH_SPF, + /*subpage_code*/0x01, + /*page_length*/{CTL_CEM_LEN >> 8, CTL_CEM_LEN}, + /*flags*/0, + /*prio*/0, + /*max_sense*/0 +}; + const static struct scsi_info_exceptions_page ie_page_default = { /*page_code*/SMS_INFO_EXCEPTIONS_PAGE, /*page_length*/sizeof(struct scsi_info_exceptions_page) - 2, /*info_flags*/SIEP_FLAGS_DEXCPT, /*mrie*/0, /*interval_timer*/{0, 0, 0, 0}, /*report_count*/{0, 0, 0, 0} }; const static struct scsi_info_exceptions_page ie_page_changeable = { /*page_code*/SMS_INFO_EXCEPTIONS_PAGE, /*page_length*/sizeof(struct scsi_info_exceptions_page) - 2, /*info_flags*/0, /*mrie*/0, /*interval_timer*/{0, 0, 0, 0}, /*report_count*/{0, 0, 0, 0} }; #define CTL_LBPM_LEN (sizeof(struct ctl_logical_block_provisioning_page) - 4) const static struct ctl_logical_block_provisioning_page lbp_page_default = {{ /*page_code*/SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, /*subpage_code*/0x02, /*page_length*/{CTL_LBPM_LEN >> 8, CTL_LBPM_LEN}, /*flags*/0, /*reserved*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /*descr*/{}}, {{/*flags*/0, /*resource*/0x01, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0x02, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0xf1, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0xf2, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}} } }; const static struct ctl_logical_block_provisioning_page lbp_page_changeable = {{ /*page_code*/SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, /*subpage_code*/0x02, /*page_length*/{CTL_LBPM_LEN >> 8, CTL_LBPM_LEN}, /*flags*/0, /*reserved*/{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /*descr*/{}}, {{/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}}, {/*flags*/0, /*resource*/0, /*reserved*/{0, 0}, /*count*/{0, 0, 0, 0}} } }; SYSCTL_NODE(_kern_cam, OID_AUTO, ctl, CTLFLAG_RD, 0, "CAM Target Layer"); static int worker_threads = -1; SYSCTL_INT(_kern_cam_ctl, OID_AUTO, worker_threads, CTLFLAG_RDTUN, &worker_threads, 1, "Number of worker threads"); static int ctl_debug = CTL_DEBUG_NONE; SYSCTL_INT(_kern_cam_ctl, OID_AUTO, debug, CTLFLAG_RWTUN, &ctl_debug, 0, "Enabled debug flags"); /* * Supported pages (0x00), Serial number (0x80), Device ID (0x83), * Extended INQUIRY Data (0x86), Mode Page Policy (0x87), * SCSI Ports (0x88), Third-party Copy (0x8F), Block limits (0xB0), * Block Device Characteristics (0xB1) and Logical Block Provisioning (0xB2) */ #define SCSI_EVPD_NUM_SUPPORTED_PAGES 10 static void ctl_isc_event_handler(ctl_ha_channel chanel, ctl_ha_event event, int param); static void ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest); static void ctl_copy_sense_data_back(union ctl_io *src, union ctl_ha_msg *dest); static int ctl_init(void); void ctl_shutdown(void); static int ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td); static int ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td); static int ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio); static int ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num, struct ctl_ooa *ooa_hdr, struct ctl_ooa_entry *kern_entries); static int ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static int ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *lun, struct ctl_be_lun *be_lun); static int ctl_free_lun(struct ctl_lun *lun); static void ctl_create_lun(struct ctl_be_lun *be_lun); static struct ctl_port * ctl_io_port(struct ctl_io_hdr *io_hdr); static int ctl_do_mode_select(union ctl_io *io); static int ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key, uint64_t sa_res_key, uint8_t type, uint32_t residx, struct ctl_scsiio *ctsio, struct scsi_per_res_out *cdb, struct scsi_per_res_out_parms* param); static void ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg); static void ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg); static int ctl_inquiry_evpd_supported(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_serial(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_mpp(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_bdc(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len); static int ctl_inquiry_evpd(struct ctl_scsiio *ctsio); static int ctl_inquiry_std(struct ctl_scsiio *ctsio); static int ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len); static ctl_action ctl_extent_check(union ctl_io *io1, union ctl_io *io2, bool seq); static ctl_action ctl_extent_check_seq(union ctl_io *io1, union ctl_io *io2); static ctl_action ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *ooa_io); static ctl_action ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *starting_io); static int ctl_check_blocked(struct ctl_lun *lun); static int ctl_scsiio_lun_check(struct ctl_lun *lun, const struct ctl_cmd_entry *entry, struct ctl_scsiio *ctsio); static void ctl_failover_lun(struct ctl_lun *lun); static int ctl_scsiio_precheck(struct ctl_softc *ctl_softc, struct ctl_scsiio *ctsio); static int ctl_scsiio(struct ctl_scsiio *ctsio); static int ctl_bus_reset(struct ctl_softc *ctl_softc, union ctl_io *io); static int ctl_target_reset(struct ctl_softc *ctl_softc, union ctl_io *io, ctl_ua_type ua_type); static int ctl_do_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type); static int ctl_lun_reset(struct ctl_softc *ctl_softc, union ctl_io *io); static int ctl_abort_task(union ctl_io *io); static int ctl_abort_task_set(union ctl_io *io); static int ctl_query_task(union ctl_io *io, int task_set); static int ctl_i_t_nexus_reset(union ctl_io *io); static int ctl_query_async_event(union ctl_io *io); static void ctl_run_task(union ctl_io *io); #ifdef CTL_IO_DELAY static void ctl_datamove_timer_wakeup(void *arg); static void ctl_done_timer_wakeup(void *arg); #endif /* CTL_IO_DELAY */ static void ctl_send_datamove_done(union ctl_io *io, int have_lock); static void ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq); static int ctl_datamove_remote_dm_write_cb(union ctl_io *io); static void ctl_datamove_remote_write(union ctl_io *io); static int ctl_datamove_remote_dm_read_cb(union ctl_io *io); static void ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq); static int ctl_datamove_remote_sgl_setup(union ctl_io *io); static int ctl_datamove_remote_xfer(union ctl_io *io, unsigned command, ctl_ha_dt_cb callback); static void ctl_datamove_remote_read(union ctl_io *io); static void ctl_datamove_remote(union ctl_io *io); static int ctl_process_done(union ctl_io *io); static void ctl_lun_thread(void *arg); static void ctl_thresh_thread(void *arg); static void ctl_work_thread(void *arg); static void ctl_enqueue_incoming(union ctl_io *io); static void ctl_enqueue_rtr(union ctl_io *io); static void ctl_enqueue_done(union ctl_io *io); static void ctl_enqueue_isc(union ctl_io *io); static const struct ctl_cmd_entry * ctl_get_cmd_entry(struct ctl_scsiio *ctsio, int *sa); static const struct ctl_cmd_entry * ctl_validate_command(struct ctl_scsiio *ctsio); static int ctl_cmd_applicable(uint8_t lun_type, const struct ctl_cmd_entry *entry); static uint64_t ctl_get_prkey(struct ctl_lun *lun, uint32_t residx); static void ctl_clr_prkey(struct ctl_lun *lun, uint32_t residx); static void ctl_alloc_prkey(struct ctl_lun *lun, uint32_t residx); static void ctl_set_prkey(struct ctl_lun *lun, uint32_t residx, uint64_t key); /* * Load the serialization table. This isn't very pretty, but is probably * the easiest way to do it. */ #include "ctl_ser_table.c" /* * We only need to define open, close and ioctl routines for this driver. */ static struct cdevsw ctl_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = ctl_open, .d_close = ctl_close, .d_ioctl = ctl_ioctl, .d_name = "ctl", }; MALLOC_DEFINE(M_CTL, "ctlmem", "Memory used for CTL"); static int ctl_module_event_handler(module_t, int /*modeventtype_t*/, void *); static moduledata_t ctl_moduledata = { "ctl", ctl_module_event_handler, NULL }; DECLARE_MODULE(ctl, ctl_moduledata, SI_SUB_CONFIGURE, SI_ORDER_THIRD); MODULE_VERSION(ctl, 1); static struct ctl_frontend ha_frontend = { .name = "ha", }; static void ctl_isc_handler_finish_xfer(struct ctl_softc *ctl_softc, union ctl_ha_msg *msg_info) { struct ctl_scsiio *ctsio; if (msg_info->hdr.original_sc == NULL) { printf("%s: original_sc == NULL!\n", __func__); /* XXX KDM now what? */ return; } ctsio = &msg_info->hdr.original_sc->scsiio; ctsio->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO; ctsio->io_hdr.status = msg_info->hdr.status; ctsio->scsi_status = msg_info->scsi.scsi_status; ctsio->sense_len = msg_info->scsi.sense_len; ctsio->sense_residual = msg_info->scsi.sense_residual; ctsio->residual = msg_info->scsi.residual; memcpy(&ctsio->sense_data, &msg_info->scsi.sense_data, msg_info->scsi.sense_len); ctl_enqueue_isc((union ctl_io *)ctsio); } static void ctl_isc_handler_finish_ser_only(struct ctl_softc *ctl_softc, union ctl_ha_msg *msg_info) { struct ctl_scsiio *ctsio; if (msg_info->hdr.serializing_sc == NULL) { printf("%s: serializing_sc == NULL!\n", __func__); /* XXX KDM now what? */ return; } ctsio = &msg_info->hdr.serializing_sc->scsiio; ctsio->io_hdr.msg_type = CTL_MSG_FINISH_IO; ctl_enqueue_isc((union ctl_io *)ctsio); } void ctl_isc_announce_lun(struct ctl_lun *lun) { struct ctl_softc *softc = lun->ctl_softc; union ctl_ha_msg *msg; struct ctl_ha_msg_lun_pr_key pr_key; int i, k; if (softc->ha_link != CTL_HA_LINK_ONLINE) return; mtx_lock(&lun->lun_lock); i = sizeof(msg->lun); if (lun->lun_devid) i += lun->lun_devid->len; i += sizeof(pr_key) * lun->pr_key_count; alloc: mtx_unlock(&lun->lun_lock); msg = malloc(i, M_CTL, M_WAITOK); mtx_lock(&lun->lun_lock); k = sizeof(msg->lun); if (lun->lun_devid) k += lun->lun_devid->len; k += sizeof(pr_key) * lun->pr_key_count; if (i < k) { free(msg, M_CTL); i = k; goto alloc; } bzero(&msg->lun, sizeof(msg->lun)); msg->hdr.msg_type = CTL_MSG_LUN_SYNC; msg->hdr.nexus.targ_lun = lun->lun; msg->hdr.nexus.targ_mapped_lun = lun->lun; msg->lun.flags = lun->flags; msg->lun.pr_generation = lun->PRGeneration; msg->lun.pr_res_idx = lun->pr_res_idx; msg->lun.pr_res_type = lun->res_type; msg->lun.pr_key_count = lun->pr_key_count; i = 0; if (lun->lun_devid) { msg->lun.lun_devid_len = lun->lun_devid->len; memcpy(&msg->lun.data[i], lun->lun_devid->data, msg->lun.lun_devid_len); i += msg->lun.lun_devid_len; } for (k = 0; k < CTL_MAX_INITIATORS; k++) { if ((pr_key.pr_key = ctl_get_prkey(lun, k)) == 0) continue; pr_key.pr_iid = k; memcpy(&msg->lun.data[i], &pr_key, sizeof(pr_key)); i += sizeof(pr_key); } mtx_unlock(&lun->lun_lock); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->port, sizeof(msg->port) + i, M_WAITOK); free(msg, M_CTL); } void ctl_isc_announce_port(struct ctl_port *port) { struct ctl_softc *softc = control_softc; union ctl_ha_msg *msg; int i; if (port->targ_port < softc->port_min || port->targ_port >= softc->port_max || softc->ha_link != CTL_HA_LINK_ONLINE) return; i = sizeof(msg->port) + strlen(port->port_name) + 1; if (port->lun_map) i += sizeof(uint32_t) * CTL_MAX_LUNS; if (port->port_devid) i += port->port_devid->len; if (port->target_devid) i += port->target_devid->len; if (port->init_devid) i += port->init_devid->len; msg = malloc(i, M_CTL, M_WAITOK); bzero(&msg->port, sizeof(msg->port)); msg->hdr.msg_type = CTL_MSG_PORT_SYNC; msg->hdr.nexus.targ_port = port->targ_port; msg->port.port_type = port->port_type; msg->port.physical_port = port->physical_port; msg->port.virtual_port = port->virtual_port; msg->port.status = port->status; i = 0; msg->port.name_len = sprintf(&msg->port.data[i], "%d:%s", softc->ha_id, port->port_name) + 1; i += msg->port.name_len; if (port->lun_map) { msg->port.lun_map_len = sizeof(uint32_t) * CTL_MAX_LUNS; memcpy(&msg->port.data[i], port->lun_map, msg->port.lun_map_len); i += msg->port.lun_map_len; } if (port->port_devid) { msg->port.port_devid_len = port->port_devid->len; memcpy(&msg->port.data[i], port->port_devid->data, msg->port.port_devid_len); i += msg->port.port_devid_len; } if (port->target_devid) { msg->port.target_devid_len = port->target_devid->len; memcpy(&msg->port.data[i], port->target_devid->data, msg->port.target_devid_len); i += msg->port.target_devid_len; } if (port->init_devid) { msg->port.init_devid_len = port->init_devid->len; memcpy(&msg->port.data[i], port->init_devid->data, msg->port.init_devid_len); i += msg->port.init_devid_len; } ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->port, sizeof(msg->port) + i, M_WAITOK); free(msg, M_CTL); } void ctl_isc_announce_iid(struct ctl_port *port, int iid) { struct ctl_softc *softc = control_softc; union ctl_ha_msg *msg; int i, l; if (port->targ_port < softc->port_min || port->targ_port >= softc->port_max || softc->ha_link != CTL_HA_LINK_ONLINE) return; mtx_lock(&softc->ctl_lock); i = sizeof(msg->iid); l = 0; if (port->wwpn_iid[iid].name) l = strlen(port->wwpn_iid[iid].name) + 1; i += l; msg = malloc(i, M_CTL, M_NOWAIT); if (msg == NULL) { mtx_unlock(&softc->ctl_lock); return; } bzero(&msg->iid, sizeof(msg->iid)); msg->hdr.msg_type = CTL_MSG_IID_SYNC; msg->hdr.nexus.targ_port = port->targ_port; msg->hdr.nexus.initid = iid; msg->iid.in_use = port->wwpn_iid[iid].in_use; msg->iid.name_len = l; msg->iid.wwpn = port->wwpn_iid[iid].wwpn; if (port->wwpn_iid[iid].name) strlcpy(msg->iid.data, port->wwpn_iid[iid].name, l); mtx_unlock(&softc->ctl_lock); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->iid, i, M_NOWAIT); free(msg, M_CTL); } static void ctl_isc_ha_link_up(struct ctl_softc *softc) { struct ctl_port *port; struct ctl_lun *lun; int i; STAILQ_FOREACH(port, &softc->port_list, links) { ctl_isc_announce_port(port); for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (port->wwpn_iid[i].in_use) ctl_isc_announce_iid(port, i); } } STAILQ_FOREACH(lun, &softc->lun_list, links) ctl_isc_announce_lun(lun); } static void ctl_isc_ha_link_down(struct ctl_softc *softc) { struct ctl_port *port; struct ctl_lun *lun; union ctl_io *io; int i; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_PEER_SC_PRIMARY) { lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY; ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); } mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); io = ctl_alloc_io(softc->othersc_pool); mtx_lock(&softc->ctl_lock); ctl_zero_io(io); io->io_hdr.msg_type = CTL_MSG_FAILOVER; io->io_hdr.nexus.targ_mapped_lun = lun->lun; ctl_enqueue_isc(io); } STAILQ_FOREACH(port, &softc->port_list, links) { if (port->targ_port >= softc->port_min && port->targ_port < softc->port_max) continue; port->status &= ~CTL_PORT_STATUS_ONLINE; for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { port->wwpn_iid[i].in_use = 0; free(port->wwpn_iid[i].name, M_CTL); port->wwpn_iid[i].name = NULL; } } mtx_unlock(&softc->ctl_lock); } static void ctl_isc_ua(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) { struct ctl_lun *lun; uint32_t iid = ctl_get_initindex(&msg->hdr.nexus); mtx_lock(&softc->ctl_lock); if (msg->hdr.nexus.targ_lun < CTL_MAX_LUNS && (lun = softc->ctl_luns[msg->hdr.nexus.targ_mapped_lun]) != NULL) { mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (msg->ua.ua_type == CTL_UA_THIN_PROV_THRES && msg->ua.ua_set) memcpy(lun->ua_tpt_info, msg->ua.ua_info, 8); if (msg->ua.ua_all) { if (msg->ua.ua_set) ctl_est_ua_all(lun, iid, msg->ua.ua_type); else ctl_clr_ua_all(lun, iid, msg->ua.ua_type); } else { if (msg->ua.ua_set) ctl_est_ua(lun, iid, msg->ua.ua_type); else ctl_clr_ua(lun, iid, msg->ua.ua_type); } mtx_unlock(&lun->lun_lock); } else mtx_unlock(&softc->ctl_lock); } static void ctl_isc_lun_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) { struct ctl_lun *lun; struct ctl_ha_msg_lun_pr_key pr_key; int i, k; ctl_lun_flags oflags; uint32_t targ_lun; targ_lun = msg->hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || ((lun = softc->ctl_luns[targ_lun]) == NULL)) { mtx_unlock(&softc->ctl_lock); return; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); return; } i = (lun->lun_devid != NULL) ? lun->lun_devid->len : 0; if (msg->lun.lun_devid_len != i || (i > 0 && memcmp(&msg->lun.data[0], lun->lun_devid->data, i) != 0)) { mtx_unlock(&lun->lun_lock); printf("%s: Received conflicting HA LUN %d\n", __func__, msg->hdr.nexus.targ_lun); return; } else { /* Record whether peer is primary. */ oflags = lun->flags; if ((msg->lun.flags & CTL_LUN_PRIMARY_SC) && (msg->lun.flags & CTL_LUN_DISABLED) == 0) lun->flags |= CTL_LUN_PEER_SC_PRIMARY; else lun->flags &= ~CTL_LUN_PEER_SC_PRIMARY; if (oflags != lun->flags) ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); /* If peer is primary and we are not -- use data */ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 && (lun->flags & CTL_LUN_PEER_SC_PRIMARY)) { lun->PRGeneration = msg->lun.pr_generation; lun->pr_res_idx = msg->lun.pr_res_idx; lun->res_type = msg->lun.pr_res_type; lun->pr_key_count = msg->lun.pr_key_count; for (k = 0; k < CTL_MAX_INITIATORS; k++) ctl_clr_prkey(lun, k); for (k = 0; k < msg->lun.pr_key_count; k++) { memcpy(&pr_key, &msg->lun.data[i], sizeof(pr_key)); ctl_alloc_prkey(lun, pr_key.pr_iid); ctl_set_prkey(lun, pr_key.pr_iid, pr_key.pr_key); i += sizeof(pr_key); } } mtx_unlock(&lun->lun_lock); CTL_DEBUG_PRINT(("%s: Known LUN %d, peer is %s\n", __func__, msg->hdr.nexus.targ_lun, (msg->lun.flags & CTL_LUN_PRIMARY_SC) ? "primary" : "secondary")); /* If we are primary but peer doesn't know -- notify */ if ((lun->flags & CTL_LUN_PRIMARY_SC) && (msg->lun.flags & CTL_LUN_PEER_SC_PRIMARY) == 0) ctl_isc_announce_lun(lun); } } static void ctl_isc_port_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) { struct ctl_port *port; struct ctl_lun *lun; int i, new; port = softc->ctl_ports[msg->hdr.nexus.targ_port]; if (port == NULL) { CTL_DEBUG_PRINT(("%s: New port %d\n", __func__, msg->hdr.nexus.targ_port)); new = 1; port = malloc(sizeof(*port), M_CTL, M_WAITOK | M_ZERO); port->frontend = &ha_frontend; port->targ_port = msg->hdr.nexus.targ_port; } else if (port->frontend == &ha_frontend) { CTL_DEBUG_PRINT(("%s: Updated port %d\n", __func__, msg->hdr.nexus.targ_port)); new = 0; } else { printf("%s: Received conflicting HA port %d\n", __func__, msg->hdr.nexus.targ_port); return; } port->port_type = msg->port.port_type; port->physical_port = msg->port.physical_port; port->virtual_port = msg->port.virtual_port; port->status = msg->port.status; i = 0; free(port->port_name, M_CTL); port->port_name = strndup(&msg->port.data[i], msg->port.name_len, M_CTL); i += msg->port.name_len; if (msg->port.lun_map_len != 0) { if (port->lun_map == NULL) port->lun_map = malloc(sizeof(uint32_t) * CTL_MAX_LUNS, M_CTL, M_WAITOK); memcpy(port->lun_map, &msg->port.data[i], sizeof(uint32_t) * CTL_MAX_LUNS); i += msg->port.lun_map_len; } else { free(port->lun_map, M_CTL); port->lun_map = NULL; } if (msg->port.port_devid_len != 0) { if (port->port_devid == NULL || port->port_devid->len != msg->port.port_devid_len) { free(port->port_devid, M_CTL); port->port_devid = malloc(sizeof(struct ctl_devid) + msg->port.port_devid_len, M_CTL, M_WAITOK); } memcpy(port->port_devid->data, &msg->port.data[i], msg->port.port_devid_len); port->port_devid->len = msg->port.port_devid_len; i += msg->port.port_devid_len; } else { free(port->port_devid, M_CTL); port->port_devid = NULL; } if (msg->port.target_devid_len != 0) { if (port->target_devid == NULL || port->target_devid->len != msg->port.target_devid_len) { free(port->target_devid, M_CTL); port->target_devid = malloc(sizeof(struct ctl_devid) + msg->port.target_devid_len, M_CTL, M_WAITOK); } memcpy(port->target_devid->data, &msg->port.data[i], msg->port.target_devid_len); port->target_devid->len = msg->port.target_devid_len; i += msg->port.target_devid_len; } else { free(port->target_devid, M_CTL); port->target_devid = NULL; } if (msg->port.init_devid_len != 0) { if (port->init_devid == NULL || port->init_devid->len != msg->port.init_devid_len) { free(port->init_devid, M_CTL); port->init_devid = malloc(sizeof(struct ctl_devid) + msg->port.init_devid_len, M_CTL, M_WAITOK); } memcpy(port->init_devid->data, &msg->port.data[i], msg->port.init_devid_len); port->init_devid->len = msg->port.init_devid_len; i += msg->port.init_devid_len; } else { free(port->init_devid, M_CTL); port->init_devid = NULL; } if (new) { if (ctl_port_register(port) != 0) { printf("%s: ctl_port_register() failed with error\n", __func__); } } mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; mtx_lock(&lun->lun_lock); ctl_est_ua_all(lun, -1, CTL_UA_INQ_CHANGE); mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); } static void ctl_isc_iid_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) { struct ctl_port *port; int iid; port = softc->ctl_ports[msg->hdr.nexus.targ_port]; if (port == NULL) { printf("%s: Received IID for unknown port %d\n", __func__, msg->hdr.nexus.targ_port); return; } iid = msg->hdr.nexus.initid; port->wwpn_iid[iid].in_use = msg->iid.in_use; port->wwpn_iid[iid].wwpn = msg->iid.wwpn; free(port->wwpn_iid[iid].name, M_CTL); if (msg->iid.name_len) { port->wwpn_iid[iid].name = strndup(&msg->iid.data[0], msg->iid.name_len, M_CTL); } else port->wwpn_iid[iid].name = NULL; } /* * ISC (Inter Shelf Communication) event handler. Events from the HA * subsystem come in here. */ static void ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param) { struct ctl_softc *softc; union ctl_io *io; struct ctl_prio *presio; ctl_ha_status isc_status; softc = control_softc; CTL_DEBUG_PRINT(("CTL: Isc Msg event %d\n", event)); if (event == CTL_HA_EVT_MSG_RECV) { union ctl_ha_msg *msg, msgbuf; if (param > sizeof(msgbuf)) msg = malloc(param, M_CTL, M_WAITOK); else msg = &msgbuf; isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_CTL, msg, param, M_WAITOK); if (isc_status != CTL_HA_STATUS_SUCCESS) { printf("%s: Error receiving message: %d\n", __func__, isc_status); if (msg != &msgbuf) free(msg, M_CTL); return; } CTL_DEBUG_PRINT(("CTL: msg_type %d\n", msg->msg_type)); switch (msg->hdr.msg_type) { case CTL_MSG_SERIALIZE: io = ctl_alloc_io(softc->othersc_pool); ctl_zero_io(io); // populate ctsio from msg io->io_hdr.io_type = CTL_IO_SCSI; io->io_hdr.msg_type = CTL_MSG_SERIALIZE; io->io_hdr.original_sc = msg->hdr.original_sc; io->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC | CTL_FLAG_IO_ACTIVE; /* * If we're in serialization-only mode, we don't * want to go through full done processing. Thus * the COPY flag. * * XXX KDM add another flag that is more specific. */ if (softc->ha_mode != CTL_HA_MODE_XFER) io->io_hdr.flags |= CTL_FLAG_INT_COPY; io->io_hdr.nexus = msg->hdr.nexus; #if 0 printf("port %u, iid %u, lun %u\n", io->io_hdr.nexus.targ_port, io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_lun); #endif io->scsiio.tag_num = msg->scsi.tag_num; io->scsiio.tag_type = msg->scsi.tag_type; #ifdef CTL_TIME_IO io->io_hdr.start_time = time_uptime; getbintime(&io->io_hdr.start_bt); #endif /* CTL_TIME_IO */ io->scsiio.cdb_len = msg->scsi.cdb_len; memcpy(io->scsiio.cdb, msg->scsi.cdb, CTL_MAX_CDBLEN); if (softc->ha_mode == CTL_HA_MODE_XFER) { const struct ctl_cmd_entry *entry; entry = ctl_get_cmd_entry(&io->scsiio, NULL); io->io_hdr.flags &= ~CTL_FLAG_DATA_MASK; io->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK; } ctl_enqueue_isc(io); break; /* Performed on the Originating SC, XFER mode only */ case CTL_MSG_DATAMOVE: { struct ctl_sg_entry *sgl; int i, j; io = msg->hdr.original_sc; if (io == NULL) { printf("%s: original_sc == NULL!\n", __func__); /* XXX KDM do something here */ break; } io->io_hdr.msg_type = CTL_MSG_DATAMOVE; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; /* * Keep track of this, we need to send it back over * when the datamove is complete. */ io->io_hdr.serializing_sc = msg->hdr.serializing_sc; if (msg->hdr.status == CTL_SUCCESS) io->io_hdr.status = msg->hdr.status; if (msg->dt.sg_sequence == 0) { i = msg->dt.kern_sg_entries + msg->dt.kern_data_len / CTL_HA_DATAMOVE_SEGMENT + 1; sgl = malloc(sizeof(*sgl) * i, M_CTL, M_WAITOK | M_ZERO); io->io_hdr.remote_sglist = sgl; io->io_hdr.local_sglist = &sgl[msg->dt.kern_sg_entries]; io->scsiio.kern_data_ptr = (uint8_t *)sgl; io->scsiio.kern_sg_entries = msg->dt.kern_sg_entries; io->scsiio.rem_sg_entries = msg->dt.kern_sg_entries; io->scsiio.kern_data_len = msg->dt.kern_data_len; io->scsiio.kern_total_len = msg->dt.kern_total_len; io->scsiio.kern_data_resid = msg->dt.kern_data_resid; io->scsiio.kern_rel_offset = msg->dt.kern_rel_offset; io->io_hdr.flags &= ~CTL_FLAG_BUS_ADDR; io->io_hdr.flags |= msg->dt.flags & CTL_FLAG_BUS_ADDR; } else sgl = (struct ctl_sg_entry *) io->scsiio.kern_data_ptr; for (i = msg->dt.sent_sg_entries, j = 0; i < (msg->dt.sent_sg_entries + msg->dt.cur_sg_entries); i++, j++) { sgl[i].addr = msg->dt.sg_list[j].addr; sgl[i].len = msg->dt.sg_list[j].len; #if 0 printf("%s: DATAMOVE: %p,%lu j=%d, i=%d\n", __func__, sgl[i].addr, sgl[i].len, j, i); #endif } /* * If this is the last piece of the I/O, we've got * the full S/G list. Queue processing in the thread. * Otherwise wait for the next piece. */ if (msg->dt.sg_last != 0) ctl_enqueue_isc(io); break; } /* Performed on the Serializing (primary) SC, XFER mode only */ case CTL_MSG_DATAMOVE_DONE: { if (msg->hdr.serializing_sc == NULL) { printf("%s: serializing_sc == NULL!\n", __func__); /* XXX KDM now what? */ break; } /* * We grab the sense information here in case * there was a failure, so we can return status * back to the initiator. */ io = msg->hdr.serializing_sc; io->io_hdr.msg_type = CTL_MSG_DATAMOVE_DONE; io->io_hdr.flags &= ~CTL_FLAG_DMA_INPROG; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; io->io_hdr.port_status = msg->scsi.fetd_status; io->scsiio.residual = msg->scsi.residual; if (msg->hdr.status != CTL_STATUS_NONE) { io->io_hdr.status = msg->hdr.status; io->scsiio.scsi_status = msg->scsi.scsi_status; io->scsiio.sense_len = msg->scsi.sense_len; io->scsiio.sense_residual =msg->scsi.sense_residual; memcpy(&io->scsiio.sense_data, &msg->scsi.sense_data, msg->scsi.sense_len); if (msg->hdr.status == CTL_SUCCESS) io->io_hdr.flags |= CTL_FLAG_STATUS_SENT; } ctl_enqueue_isc(io); break; } /* Preformed on Originating SC, SER_ONLY mode */ case CTL_MSG_R2R: io = msg->hdr.original_sc; if (io == NULL) { printf("%s: original_sc == NULL!\n", __func__); break; } io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; io->io_hdr.msg_type = CTL_MSG_R2R; io->io_hdr.serializing_sc = msg->hdr.serializing_sc; ctl_enqueue_isc(io); break; /* * Performed on Serializing(i.e. primary SC) SC in SER_ONLY * mode. * Performed on the Originating (i.e. secondary) SC in XFER * mode */ case CTL_MSG_FINISH_IO: if (softc->ha_mode == CTL_HA_MODE_XFER) ctl_isc_handler_finish_xfer(softc, msg); else ctl_isc_handler_finish_ser_only(softc, msg); break; /* Preformed on Originating SC */ case CTL_MSG_BAD_JUJU: io = msg->hdr.original_sc; if (io == NULL) { printf("%s: Bad JUJU!, original_sc is NULL!\n", __func__); break; } ctl_copy_sense_data(msg, io); /* * IO should have already been cleaned up on other * SC so clear this flag so we won't send a message * back to finish the IO there. */ io->io_hdr.flags &= ~CTL_FLAG_SENT_2OTHER_SC; io->io_hdr.flags |= CTL_FLAG_IO_ACTIVE; /* io = msg->hdr.serializing_sc; */ io->io_hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_enqueue_isc(io); break; /* Handle resets sent from the other side */ case CTL_MSG_MANAGE_TASKS: { struct ctl_taskio *taskio; taskio = (struct ctl_taskio *)ctl_alloc_io( softc->othersc_pool); ctl_zero_io((union ctl_io *)taskio); taskio->io_hdr.io_type = CTL_IO_TASK; taskio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC; taskio->io_hdr.nexus = msg->hdr.nexus; taskio->task_action = msg->task.task_action; taskio->tag_num = msg->task.tag_num; taskio->tag_type = msg->task.tag_type; #ifdef CTL_TIME_IO taskio->io_hdr.start_time = time_uptime; getbintime(&taskio->io_hdr.start_bt); #endif /* CTL_TIME_IO */ ctl_run_task((union ctl_io *)taskio); break; } /* Persistent Reserve action which needs attention */ case CTL_MSG_PERS_ACTION: presio = (struct ctl_prio *)ctl_alloc_io( softc->othersc_pool); ctl_zero_io((union ctl_io *)presio); presio->io_hdr.msg_type = CTL_MSG_PERS_ACTION; presio->io_hdr.flags |= CTL_FLAG_FROM_OTHER_SC; presio->io_hdr.nexus = msg->hdr.nexus; presio->pr_msg = msg->pr; ctl_enqueue_isc((union ctl_io *)presio); break; case CTL_MSG_UA: ctl_isc_ua(softc, msg, param); break; case CTL_MSG_PORT_SYNC: ctl_isc_port_sync(softc, msg, param); break; case CTL_MSG_LUN_SYNC: ctl_isc_lun_sync(softc, msg, param); break; case CTL_MSG_IID_SYNC: ctl_isc_iid_sync(softc, msg, param); break; default: printf("Received HA message of unknown type %d\n", msg->hdr.msg_type); break; } if (msg != &msgbuf) free(msg, M_CTL); } else if (event == CTL_HA_EVT_LINK_CHANGE) { printf("CTL: HA link status changed from %d to %d\n", softc->ha_link, param); if (param == softc->ha_link) return; if (softc->ha_link == CTL_HA_LINK_ONLINE) { softc->ha_link = param; ctl_isc_ha_link_down(softc); } else { softc->ha_link = param; if (softc->ha_link == CTL_HA_LINK_ONLINE) ctl_isc_ha_link_up(softc); } return; } else { printf("ctl_isc_event_handler: Unknown event %d\n", event); return; } } static void ctl_copy_sense_data(union ctl_ha_msg *src, union ctl_io *dest) { memcpy(&dest->scsiio.sense_data, &src->scsi.sense_data, src->scsi.sense_len); dest->scsiio.scsi_status = src->scsi.scsi_status; dest->scsiio.sense_len = src->scsi.sense_len; dest->io_hdr.status = src->hdr.status; } static void ctl_copy_sense_data_back(union ctl_io *src, union ctl_ha_msg *dest) { memcpy(&dest->scsi.sense_data, &src->scsiio.sense_data, src->scsiio.sense_len); dest->scsi.scsi_status = src->scsiio.scsi_status; dest->scsi.sense_len = src->scsiio.sense_len; dest->hdr.status = src->io_hdr.status; } void ctl_est_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua) { struct ctl_softc *softc = lun->ctl_softc; ctl_ua_type *pu; if (initidx < softc->init_min || initidx >= softc->init_max) return; mtx_assert(&lun->lun_lock, MA_OWNED); pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT]; if (pu == NULL) return; pu[initidx % CTL_MAX_INIT_PER_PORT] |= ua; } void ctl_est_ua_port(struct ctl_lun *lun, int port, uint32_t except, ctl_ua_type ua) { int i; mtx_assert(&lun->lun_lock, MA_OWNED); if (lun->pending_ua[port] == NULL) return; for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (port * CTL_MAX_INIT_PER_PORT + i == except) continue; lun->pending_ua[port][i] |= ua; } } void ctl_est_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua) { struct ctl_softc *softc = lun->ctl_softc; int i; mtx_assert(&lun->lun_lock, MA_OWNED); for (i = softc->port_min; i < softc->port_max; i++) ctl_est_ua_port(lun, i, except, ua); } void ctl_clr_ua(struct ctl_lun *lun, uint32_t initidx, ctl_ua_type ua) { struct ctl_softc *softc = lun->ctl_softc; ctl_ua_type *pu; if (initidx < softc->init_min || initidx >= softc->init_max) return; mtx_assert(&lun->lun_lock, MA_OWNED); pu = lun->pending_ua[initidx / CTL_MAX_INIT_PER_PORT]; if (pu == NULL) return; pu[initidx % CTL_MAX_INIT_PER_PORT] &= ~ua; } void ctl_clr_ua_all(struct ctl_lun *lun, uint32_t except, ctl_ua_type ua) { struct ctl_softc *softc = lun->ctl_softc; int i, j; mtx_assert(&lun->lun_lock, MA_OWNED); for (i = softc->port_min; i < softc->port_max; i++) { if (lun->pending_ua[i] == NULL) continue; for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (i * CTL_MAX_INIT_PER_PORT + j == except) continue; lun->pending_ua[i][j] &= ~ua; } } } void ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx, ctl_ua_type ua_type) { struct ctl_lun *lun; mtx_assert(&ctl_softc->ctl_lock, MA_OWNED); STAILQ_FOREACH(lun, &ctl_softc->lun_list, links) { mtx_lock(&lun->lun_lock); ctl_clr_ua(lun, initidx, ua_type); mtx_unlock(&lun->lun_lock); } } static int ctl_ha_role_sysctl(SYSCTL_HANDLER_ARGS) { struct ctl_softc *softc = (struct ctl_softc *)arg1; struct ctl_lun *lun; struct ctl_lun_req ireq; int error, value; value = (softc->flags & CTL_FLAG_ACTIVE_SHELF) ? 0 : 1; error = sysctl_handle_int(oidp, &value, 0, req); if ((error != 0) || (req->newptr == NULL)) return (error); mtx_lock(&softc->ctl_lock); if (value == 0) softc->flags |= CTL_FLAG_ACTIVE_SHELF; else softc->flags &= ~CTL_FLAG_ACTIVE_SHELF; STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_unlock(&softc->ctl_lock); bzero(&ireq, sizeof(ireq)); ireq.reqtype = CTL_LUNREQ_MODIFY; ireq.reqdata.modify.lun_id = lun->lun; lun->backend->ioctl(NULL, CTL_LUN_REQ, (caddr_t)&ireq, 0, curthread); if (ireq.status != CTL_LUN_OK) { printf("%s: CTL_LUNREQ_MODIFY returned %d '%s'\n", __func__, ireq.status, ireq.error_str); } mtx_lock(&softc->ctl_lock); } mtx_unlock(&softc->ctl_lock); return (0); } static int ctl_init(void) { struct ctl_softc *softc; void *other_pool; int i, error, retval; retval = 0; control_softc = malloc(sizeof(*control_softc), M_DEVBUF, M_WAITOK | M_ZERO); softc = control_softc; softc->dev = make_dev(&ctl_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600, "cam/ctl"); softc->dev->si_drv1 = softc; sysctl_ctx_init(&softc->sysctl_ctx); softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_kern_cam), OID_AUTO, "ctl", CTLFLAG_RD, 0, "CAM Target Layer"); if (softc->sysctl_tree == NULL) { printf("%s: unable to allocate sysctl tree\n", __func__); destroy_dev(softc->dev); free(control_softc, M_DEVBUF); control_softc = NULL; return (ENOMEM); } mtx_init(&softc->ctl_lock, "CTL mutex", NULL, MTX_DEF); softc->io_zone = uma_zcreate("CTL IO", sizeof(union ctl_io), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); softc->open_count = 0; /* * Default to actually sending a SYNCHRONIZE CACHE command down to * the drive. */ softc->flags = CTL_FLAG_REAL_SYNC; SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "ha_mode", CTLFLAG_RDTUN, (int *)&softc->ha_mode, 0, "HA mode (0 - act/stby, 1 - serialize only, 2 - xfer)"); /* * In Copan's HA scheme, the "master" and "slave" roles are * figured out through the slot the controller is in. Although it * is an active/active system, someone has to be in charge. */ SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "ha_id", CTLFLAG_RDTUN, &softc->ha_id, 0, "HA head ID (0 - no HA)"); if (softc->ha_id == 0 || softc->ha_id > NUM_TARGET_PORT_GROUPS) { softc->flags |= CTL_FLAG_ACTIVE_SHELF; softc->is_single = 1; softc->port_cnt = CTL_MAX_PORTS; softc->port_min = 0; } else { softc->port_cnt = CTL_MAX_PORTS / NUM_TARGET_PORT_GROUPS; softc->port_min = (softc->ha_id - 1) * softc->port_cnt; } softc->port_max = softc->port_min + softc->port_cnt; softc->init_min = softc->port_min * CTL_MAX_INIT_PER_PORT; softc->init_max = softc->port_max * CTL_MAX_INIT_PER_PORT; SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "ha_link", CTLFLAG_RD, (int *)&softc->ha_link, 0, "HA link state (0 - offline, 1 - unknown, 2 - online)"); STAILQ_INIT(&softc->lun_list); STAILQ_INIT(&softc->pending_lun_queue); STAILQ_INIT(&softc->fe_list); STAILQ_INIT(&softc->port_list); STAILQ_INIT(&softc->be_list); ctl_tpc_init(softc); if (ctl_pool_create(softc, "othersc", CTL_POOL_ENTRIES_OTHER_SC, &other_pool) != 0) { printf("ctl: can't allocate %d entry other SC pool, " "exiting\n", CTL_POOL_ENTRIES_OTHER_SC); return (ENOMEM); } softc->othersc_pool = other_pool; if (worker_threads <= 0) worker_threads = max(1, mp_ncpus / 4); if (worker_threads > CTL_MAX_THREADS) worker_threads = CTL_MAX_THREADS; for (i = 0; i < worker_threads; i++) { struct ctl_thread *thr = &softc->threads[i]; mtx_init(&thr->queue_lock, "CTL queue mutex", NULL, MTX_DEF); thr->ctl_softc = softc; STAILQ_INIT(&thr->incoming_queue); STAILQ_INIT(&thr->rtr_queue); STAILQ_INIT(&thr->done_queue); STAILQ_INIT(&thr->isc_queue); error = kproc_kthread_add(ctl_work_thread, thr, &softc->ctl_proc, &thr->thread, 0, 0, "ctl", "work%d", i); if (error != 0) { printf("error creating CTL work thread!\n"); ctl_pool_free(other_pool); return (error); } } error = kproc_kthread_add(ctl_lun_thread, softc, &softc->ctl_proc, NULL, 0, 0, "ctl", "lun"); if (error != 0) { printf("error creating CTL lun thread!\n"); ctl_pool_free(other_pool); return (error); } error = kproc_kthread_add(ctl_thresh_thread, softc, &softc->ctl_proc, NULL, 0, 0, "ctl", "thresh"); if (error != 0) { printf("error creating CTL threshold thread!\n"); ctl_pool_free(other_pool); return (error); } SYSCTL_ADD_PROC(&softc->sysctl_ctx,SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "ha_role", CTLTYPE_INT | CTLFLAG_RWTUN, softc, 0, ctl_ha_role_sysctl, "I", "HA role for this head"); if (softc->is_single == 0) { ctl_frontend_register(&ha_frontend); if (ctl_ha_msg_init(softc) != CTL_HA_STATUS_SUCCESS) { printf("ctl_init: ctl_ha_msg_init failed.\n"); softc->is_single = 1; } else if (ctl_ha_msg_register(CTL_HA_CHAN_CTL, ctl_isc_event_handler) != CTL_HA_STATUS_SUCCESS) { printf("ctl_init: ctl_ha_msg_register failed.\n"); softc->is_single = 1; } } return (0); } void ctl_shutdown(void) { struct ctl_softc *softc; struct ctl_lun *lun, *next_lun; softc = (struct ctl_softc *)control_softc; if (softc->is_single == 0) { ctl_ha_msg_shutdown(softc); if (ctl_ha_msg_deregister(CTL_HA_CHAN_CTL) != CTL_HA_STATUS_SUCCESS) printf("%s: ctl_ha_msg_deregister failed.\n", __func__); if (ctl_ha_msg_destroy(softc) != CTL_HA_STATUS_SUCCESS) printf("%s: ctl_ha_msg_destroy failed.\n", __func__); ctl_frontend_deregister(&ha_frontend); } mtx_lock(&softc->ctl_lock); /* * Free up each LUN. */ for (lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; lun = next_lun){ next_lun = STAILQ_NEXT(lun, links); ctl_free_lun(lun); } mtx_unlock(&softc->ctl_lock); #if 0 ctl_shutdown_thread(softc->work_thread); mtx_destroy(&softc->queue_lock); #endif ctl_tpc_shutdown(softc); uma_zdestroy(softc->io_zone); mtx_destroy(&softc->ctl_lock); destroy_dev(softc->dev); sysctl_ctx_free(&softc->sysctl_ctx); free(control_softc, M_DEVBUF); control_softc = NULL; } static int ctl_module_event_handler(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: return (ctl_init()); case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } } /* * XXX KDM should we do some access checks here? Bump a reference count to * prevent a CTL module from being unloaded while someone has it open? */ static int ctl_open(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } static int ctl_close(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } /* * Remove an initiator by port number and initiator ID. * Returns 0 for success, -1 for failure. */ int ctl_remove_initiator(struct ctl_port *port, int iid) { struct ctl_softc *softc = control_softc; mtx_assert(&softc->ctl_lock, MA_NOTOWNED); if (iid > CTL_MAX_INIT_PER_PORT) { printf("%s: initiator ID %u > maximun %u!\n", __func__, iid, CTL_MAX_INIT_PER_PORT); return (-1); } mtx_lock(&softc->ctl_lock); port->wwpn_iid[iid].in_use--; port->wwpn_iid[iid].last_use = time_uptime; mtx_unlock(&softc->ctl_lock); ctl_isc_announce_iid(port, iid); return (0); } /* * Add an initiator to the initiator map. * Returns iid for success, < 0 for failure. */ int ctl_add_initiator(struct ctl_port *port, int iid, uint64_t wwpn, char *name) { struct ctl_softc *softc = control_softc; time_t best_time; int i, best; mtx_assert(&softc->ctl_lock, MA_NOTOWNED); if (iid >= CTL_MAX_INIT_PER_PORT) { printf("%s: WWPN %#jx initiator ID %u > maximum %u!\n", __func__, wwpn, iid, CTL_MAX_INIT_PER_PORT); free(name, M_CTL); return (-1); } mtx_lock(&softc->ctl_lock); if (iid < 0 && (wwpn != 0 || name != NULL)) { for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (wwpn != 0 && wwpn == port->wwpn_iid[i].wwpn) { iid = i; break; } if (name != NULL && port->wwpn_iid[i].name != NULL && strcmp(name, port->wwpn_iid[i].name) == 0) { iid = i; break; } } } if (iid < 0) { for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (port->wwpn_iid[i].in_use == 0 && port->wwpn_iid[i].wwpn == 0 && port->wwpn_iid[i].name == NULL) { iid = i; break; } } } if (iid < 0) { best = -1; best_time = INT32_MAX; for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { if (port->wwpn_iid[i].in_use == 0) { if (port->wwpn_iid[i].last_use < best_time) { best = i; best_time = port->wwpn_iid[i].last_use; } } } iid = best; } if (iid < 0) { mtx_unlock(&softc->ctl_lock); free(name, M_CTL); return (-2); } if (port->wwpn_iid[iid].in_use > 0 && (wwpn != 0 || name != NULL)) { /* * This is not an error yet. */ if (wwpn != 0 && wwpn == port->wwpn_iid[iid].wwpn) { #if 0 printf("%s: port %d iid %u WWPN %#jx arrived" " again\n", __func__, port->targ_port, iid, (uintmax_t)wwpn); #endif goto take; } if (name != NULL && port->wwpn_iid[iid].name != NULL && strcmp(name, port->wwpn_iid[iid].name) == 0) { #if 0 printf("%s: port %d iid %u name '%s' arrived" " again\n", __func__, port->targ_port, iid, name); #endif goto take; } /* * This is an error, but what do we do about it? The * driver is telling us we have a new WWPN for this * initiator ID, so we pretty much need to use it. */ printf("%s: port %d iid %u WWPN %#jx '%s' arrived," " but WWPN %#jx '%s' is still at that address\n", __func__, port->targ_port, iid, wwpn, name, (uintmax_t)port->wwpn_iid[iid].wwpn, port->wwpn_iid[iid].name); /* * XXX KDM clear have_ca and ua_pending on each LUN for * this initiator. */ } take: free(port->wwpn_iid[iid].name, M_CTL); port->wwpn_iid[iid].name = name; port->wwpn_iid[iid].wwpn = wwpn; port->wwpn_iid[iid].in_use++; mtx_unlock(&softc->ctl_lock); ctl_isc_announce_iid(port, iid); return (iid); } static int ctl_create_iid(struct ctl_port *port, int iid, uint8_t *buf) { int len; switch (port->port_type) { case CTL_PORT_FC: { struct scsi_transportid_fcp *id = (struct scsi_transportid_fcp *)buf; if (port->wwpn_iid[iid].wwpn == 0) return (0); memset(id, 0, sizeof(*id)); id->format_protocol = SCSI_PROTO_FC; scsi_u64to8b(port->wwpn_iid[iid].wwpn, id->n_port_name); return (sizeof(*id)); } case CTL_PORT_ISCSI: { struct scsi_transportid_iscsi_port *id = (struct scsi_transportid_iscsi_port *)buf; if (port->wwpn_iid[iid].name == NULL) return (0); memset(id, 0, 256); id->format_protocol = SCSI_TRN_ISCSI_FORMAT_PORT | SCSI_PROTO_ISCSI; len = strlcpy(id->iscsi_name, port->wwpn_iid[iid].name, 252) + 1; len = roundup2(min(len, 252), 4); scsi_ulto2b(len, id->additional_length); return (sizeof(*id) + len); } case CTL_PORT_SAS: { struct scsi_transportid_sas *id = (struct scsi_transportid_sas *)buf; if (port->wwpn_iid[iid].wwpn == 0) return (0); memset(id, 0, sizeof(*id)); id->format_protocol = SCSI_PROTO_SAS; scsi_u64to8b(port->wwpn_iid[iid].wwpn, id->sas_address); return (sizeof(*id)); } default: { struct scsi_transportid_spi *id = (struct scsi_transportid_spi *)buf; memset(id, 0, sizeof(*id)); id->format_protocol = SCSI_PROTO_SPI; scsi_ulto2b(iid, id->scsi_addr); scsi_ulto2b(port->targ_port, id->rel_trgt_port_id); return (sizeof(*id)); } } } /* * Serialize a command that went down the "wrong" side, and so was sent to * this controller for execution. The logic is a little different than the * standard case in ctl_scsiio_precheck(). Errors in this case need to get * sent back to the other side, but in the success case, we execute the * command on this side (XFER mode) or tell the other side to execute it * (SER_ONLY mode). */ static int ctl_serialize_other_sc_cmd(struct ctl_scsiio *ctsio) { struct ctl_softc *softc; union ctl_ha_msg msg_info; struct ctl_lun *lun; const struct ctl_cmd_entry *entry; int retval = 0; uint32_t targ_lun; softc = control_softc; targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun < CTL_MAX_LUNS) && ((lun = softc->ctl_luns[targ_lun]) != NULL)) { mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); /* * If the LUN is invalid, pretend that it doesn't exist. * It will go away as soon as all pending I/O has been * completed. */ if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); lun = NULL; } } else { mtx_unlock(&softc->ctl_lock); lun = NULL; } if (lun == NULL) { /* * The other node would not send this request to us unless * received announce that we are primary node for this LUN. * If this LUN does not exist now, it is probably result of * a race, so respond to initiator in the most opaque way. */ ctl_set_busy(ctsio); ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info); msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.scsi), M_WAITOK); return(1); } entry = ctl_get_cmd_entry(ctsio, NULL); if (ctl_scsiio_lun_check(lun, entry, ctsio) != 0) { mtx_unlock(&lun->lun_lock); ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info); msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.scsi), M_WAITOK); return(1); } ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = lun; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = lun->be_lun; /* * Every I/O goes into the OOA queue for a * particular LUN, and stays there until completion. */ #ifdef CTL_TIME_IO if (TAILQ_EMPTY(&lun->ooa_queue)) lun->idle_time += getsbinuptime() - lun->last_busy; #endif TAILQ_INSERT_TAIL(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); switch (ctl_check_ooa(lun, (union ctl_io *)ctsio, (union ctl_io *)TAILQ_PREV(&ctsio->io_hdr, ctl_ooaq, ooa_links))) { case CTL_ACTION_BLOCK: ctsio->io_hdr.flags |= CTL_FLAG_BLOCKED; TAILQ_INSERT_TAIL(&lun->blocked_queue, &ctsio->io_hdr, blocked_links); mtx_unlock(&lun->lun_lock); break; case CTL_ACTION_PASS: case CTL_ACTION_SKIP: if (softc->ha_mode == CTL_HA_MODE_XFER) { ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr((union ctl_io *)ctsio); mtx_unlock(&lun->lun_lock); } else { ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; mtx_unlock(&lun->lun_lock); /* send msg back to other side */ msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = (union ctl_io *)ctsio; msg_info.hdr.msg_type = CTL_MSG_R2R; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.hdr), M_WAITOK); } break; case CTL_ACTION_OVERLAP: TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); mtx_unlock(&lun->lun_lock); retval = 1; ctl_set_overlapped_cmd(ctsio); ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info); msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.scsi), M_WAITOK); break; case CTL_ACTION_OVERLAP_TAG: TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); mtx_unlock(&lun->lun_lock); retval = 1; ctl_set_overlapped_tag(ctsio, ctsio->tag_num); ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info); msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.scsi), M_WAITOK); break; case CTL_ACTION_ERROR: default: TAILQ_REMOVE(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); mtx_unlock(&lun->lun_lock); retval = 1; ctl_set_internal_failure(ctsio, /*sks_valid*/ 0, /*retry_count*/ 0); ctl_copy_sense_data_back((union ctl_io *)ctsio, &msg_info); msg_info.hdr.original_sc = ctsio->io_hdr.original_sc; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.msg_type = CTL_MSG_BAD_JUJU; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.scsi), M_WAITOK); break; } return (retval); } /* * Returns 0 for success, errno for failure. */ static int ctl_ioctl_fill_ooa(struct ctl_lun *lun, uint32_t *cur_fill_num, struct ctl_ooa *ooa_hdr, struct ctl_ooa_entry *kern_entries) { union ctl_io *io; int retval; retval = 0; mtx_lock(&lun->lun_lock); for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); (io != NULL); (*cur_fill_num)++, io = (union ctl_io *)TAILQ_NEXT(&io->io_hdr, ooa_links)) { struct ctl_ooa_entry *entry; /* * If we've got more than we can fit, just count the * remaining entries. */ if (*cur_fill_num >= ooa_hdr->alloc_num) continue; entry = &kern_entries[*cur_fill_num]; entry->tag_num = io->scsiio.tag_num; entry->lun_num = lun->lun; #ifdef CTL_TIME_IO entry->start_bt = io->io_hdr.start_bt; #endif bcopy(io->scsiio.cdb, entry->cdb, io->scsiio.cdb_len); entry->cdb_len = io->scsiio.cdb_len; if (io->io_hdr.flags & CTL_FLAG_BLOCKED) entry->cmd_flags |= CTL_OOACMD_FLAG_BLOCKED; if (io->io_hdr.flags & CTL_FLAG_DMA_INPROG) entry->cmd_flags |= CTL_OOACMD_FLAG_DMA; if (io->io_hdr.flags & CTL_FLAG_ABORT) entry->cmd_flags |= CTL_OOACMD_FLAG_ABORT; if (io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR) entry->cmd_flags |= CTL_OOACMD_FLAG_RTR; if (io->io_hdr.flags & CTL_FLAG_DMA_QUEUED) entry->cmd_flags |= CTL_OOACMD_FLAG_DMA_QUEUED; } mtx_unlock(&lun->lun_lock); return (retval); } static void * ctl_copyin_alloc(void *user_addr, int len, char *error_str, size_t error_str_len) { void *kptr; kptr = malloc(len, M_CTL, M_WAITOK | M_ZERO); if (copyin(user_addr, kptr, len) != 0) { snprintf(error_str, error_str_len, "Error copying %d bytes " "from user address %p to kernel address %p", len, user_addr, kptr); free(kptr, M_CTL); return (NULL); } return (kptr); } static void ctl_free_args(int num_args, struct ctl_be_arg *args) { int i; if (args == NULL) return; for (i = 0; i < num_args; i++) { free(args[i].kname, M_CTL); free(args[i].kvalue, M_CTL); } free(args, M_CTL); } static struct ctl_be_arg * ctl_copyin_args(int num_args, struct ctl_be_arg *uargs, char *error_str, size_t error_str_len) { struct ctl_be_arg *args; int i; args = ctl_copyin_alloc(uargs, num_args * sizeof(*args), error_str, error_str_len); if (args == NULL) goto bailout; for (i = 0; i < num_args; i++) { args[i].kname = NULL; args[i].kvalue = NULL; } for (i = 0; i < num_args; i++) { uint8_t *tmpptr; args[i].kname = ctl_copyin_alloc(args[i].name, args[i].namelen, error_str, error_str_len); if (args[i].kname == NULL) goto bailout; if (args[i].kname[args[i].namelen - 1] != '\0') { snprintf(error_str, error_str_len, "Argument %d " "name is not NUL-terminated", i); goto bailout; } if (args[i].flags & CTL_BEARG_RD) { tmpptr = ctl_copyin_alloc(args[i].value, args[i].vallen, error_str, error_str_len); if (tmpptr == NULL) goto bailout; if ((args[i].flags & CTL_BEARG_ASCII) && (tmpptr[args[i].vallen - 1] != '\0')) { snprintf(error_str, error_str_len, "Argument " "%d value is not NUL-terminated", i); goto bailout; } args[i].kvalue = tmpptr; } else { args[i].kvalue = malloc(args[i].vallen, M_CTL, M_WAITOK | M_ZERO); } } return (args); bailout: ctl_free_args(num_args, args); return (NULL); } static void ctl_copyout_args(int num_args, struct ctl_be_arg *args) { int i; for (i = 0; i < num_args; i++) { if (args[i].flags & CTL_BEARG_WR) copyout(args[i].kvalue, args[i].value, args[i].vallen); } } /* * Escape characters that are illegal or not recommended in XML. */ int ctl_sbuf_printf_esc(struct sbuf *sb, char *str, int size) { char *end = str + size; int retval; retval = 0; for (; *str && str < end; str++) { switch (*str) { case '&': retval = sbuf_printf(sb, "&"); break; case '>': retval = sbuf_printf(sb, ">"); break; case '<': retval = sbuf_printf(sb, "<"); break; default: retval = sbuf_putc(sb, *str); break; } if (retval != 0) break; } return (retval); } static void ctl_id_sbuf(struct ctl_devid *id, struct sbuf *sb) { struct scsi_vpd_id_descriptor *desc; int i; if (id == NULL || id->len < 4) return; desc = (struct scsi_vpd_id_descriptor *)id->data; switch (desc->id_type & SVPD_ID_TYPE_MASK) { case SVPD_ID_TYPE_T10: sbuf_printf(sb, "t10."); break; case SVPD_ID_TYPE_EUI64: sbuf_printf(sb, "eui."); break; case SVPD_ID_TYPE_NAA: sbuf_printf(sb, "naa."); break; case SVPD_ID_TYPE_SCSI_NAME: break; } switch (desc->proto_codeset & SVPD_ID_CODESET_MASK) { case SVPD_ID_CODESET_BINARY: for (i = 0; i < desc->length; i++) sbuf_printf(sb, "%02x", desc->identifier[i]); break; case SVPD_ID_CODESET_ASCII: sbuf_printf(sb, "%.*s", (int)desc->length, (char *)desc->identifier); break; case SVPD_ID_CODESET_UTF8: sbuf_printf(sb, "%s", (char *)desc->identifier); break; } } static int ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_softc *softc; struct ctl_lun *lun; int retval; softc = control_softc; retval = 0; switch (cmd) { case CTL_IO: retval = ctl_ioctl_io(dev, cmd, addr, flag, td); break; case CTL_ENABLE_PORT: case CTL_DISABLE_PORT: case CTL_SET_PORT_WWNS: { struct ctl_port *port; struct ctl_port_entry *entry; entry = (struct ctl_port_entry *)addr; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { int action, done; if (port->targ_port < softc->port_min || port->targ_port >= softc->port_max) continue; action = 0; done = 0; if ((entry->port_type == CTL_PORT_NONE) && (entry->targ_port == port->targ_port)) { /* * If the user only wants to enable or * disable or set WWNs on a specific port, * do the operation and we're done. */ action = 1; done = 1; } else if (entry->port_type & port->port_type) { /* * Compare the user's type mask with the * particular frontend type to see if we * have a match. */ action = 1; done = 0; /* * Make sure the user isn't trying to set * WWNs on multiple ports at the same time. */ if (cmd == CTL_SET_PORT_WWNS) { printf("%s: Can't set WWNs on " "multiple ports\n", __func__); retval = EINVAL; break; } } if (action == 0) continue; /* * XXX KDM we have to drop the lock here, because * the online/offline operations can potentially * block. We need to reference count the frontends * so they can't go away, */ if (cmd == CTL_ENABLE_PORT) { mtx_unlock(&softc->ctl_lock); ctl_port_online(port); mtx_lock(&softc->ctl_lock); } else if (cmd == CTL_DISABLE_PORT) { mtx_unlock(&softc->ctl_lock); ctl_port_offline(port); mtx_lock(&softc->ctl_lock); } else if (cmd == CTL_SET_PORT_WWNS) { ctl_port_set_wwns(port, (entry->flags & CTL_PORT_WWNN_VALID) ? 1 : 0, entry->wwnn, (entry->flags & CTL_PORT_WWPN_VALID) ? 1 : 0, entry->wwpn); } if (done != 0) break; } mtx_unlock(&softc->ctl_lock); break; } case CTL_GET_PORT_LIST: { struct ctl_port *port; struct ctl_port_list *list; int i; list = (struct ctl_port_list *)addr; if (list->alloc_len != (list->alloc_num * sizeof(struct ctl_port_entry))) { printf("%s: CTL_GET_PORT_LIST: alloc_len %u != " "alloc_num %u * sizeof(struct ctl_port_entry) " "%zu\n", __func__, list->alloc_len, list->alloc_num, sizeof(struct ctl_port_entry)); retval = EINVAL; break; } list->fill_len = 0; list->fill_num = 0; list->dropped_num = 0; i = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { struct ctl_port_entry entry, *list_entry; if (list->fill_num >= list->alloc_num) { list->dropped_num++; continue; } entry.port_type = port->port_type; strlcpy(entry.port_name, port->port_name, sizeof(entry.port_name)); entry.targ_port = port->targ_port; entry.physical_port = port->physical_port; entry.virtual_port = port->virtual_port; entry.wwnn = port->wwnn; entry.wwpn = port->wwpn; if (port->status & CTL_PORT_STATUS_ONLINE) entry.online = 1; else entry.online = 0; list_entry = &list->entries[i]; retval = copyout(&entry, list_entry, sizeof(entry)); if (retval != 0) { printf("%s: CTL_GET_PORT_LIST: copyout " "returned %d\n", __func__, retval); break; } i++; list->fill_num++; list->fill_len += sizeof(entry); } mtx_unlock(&softc->ctl_lock); /* * If this is non-zero, we had a copyout fault, so there's * probably no point in attempting to set the status inside * the structure. */ if (retval != 0) break; if (list->dropped_num > 0) list->status = CTL_PORT_LIST_NEED_MORE_SPACE; else list->status = CTL_PORT_LIST_OK; break; } case CTL_DUMP_OOA: { union ctl_io *io; char printbuf[128]; struct sbuf sb; mtx_lock(&softc->ctl_lock); printf("Dumping OOA queues:\n"); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); for (io = (union ctl_io *)TAILQ_FIRST( &lun->ooa_queue); io != NULL; io = (union ctl_io *)TAILQ_NEXT(&io->io_hdr, ooa_links)) { sbuf_new(&sb, printbuf, sizeof(printbuf), SBUF_FIXEDLEN); sbuf_printf(&sb, "LUN %jd tag 0x%04x%s%s%s%s: ", (intmax_t)lun->lun, io->scsiio.tag_num, (io->io_hdr.flags & CTL_FLAG_BLOCKED) ? "" : " BLOCKED", (io->io_hdr.flags & CTL_FLAG_DMA_INPROG) ? " DMA" : "", (io->io_hdr.flags & CTL_FLAG_ABORT) ? " ABORT" : "", (io->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR) ? " RTR" : ""); ctl_scsi_command_string(&io->scsiio, NULL, &sb); sbuf_finish(&sb); printf("%s\n", sbuf_data(&sb)); } mtx_unlock(&lun->lun_lock); } printf("OOA queues dump done\n"); mtx_unlock(&softc->ctl_lock); break; } case CTL_GET_OOA: { struct ctl_ooa *ooa_hdr; struct ctl_ooa_entry *entries; uint32_t cur_fill_num; ooa_hdr = (struct ctl_ooa *)addr; if ((ooa_hdr->alloc_len == 0) || (ooa_hdr->alloc_num == 0)) { printf("%s: CTL_GET_OOA: alloc len %u and alloc num %u " "must be non-zero\n", __func__, ooa_hdr->alloc_len, ooa_hdr->alloc_num); retval = EINVAL; break; } if (ooa_hdr->alloc_len != (ooa_hdr->alloc_num * sizeof(struct ctl_ooa_entry))) { printf("%s: CTL_GET_OOA: alloc len %u must be alloc " "num %d * sizeof(struct ctl_ooa_entry) %zd\n", __func__, ooa_hdr->alloc_len, ooa_hdr->alloc_num,sizeof(struct ctl_ooa_entry)); retval = EINVAL; break; } entries = malloc(ooa_hdr->alloc_len, M_CTL, M_WAITOK | M_ZERO); if (entries == NULL) { printf("%s: could not allocate %d bytes for OOA " "dump\n", __func__, ooa_hdr->alloc_len); retval = ENOMEM; break; } mtx_lock(&softc->ctl_lock); if (((ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) == 0) && ((ooa_hdr->lun_num >= CTL_MAX_LUNS) || (softc->ctl_luns[ooa_hdr->lun_num] == NULL))) { mtx_unlock(&softc->ctl_lock); free(entries, M_CTL); printf("%s: CTL_GET_OOA: invalid LUN %ju\n", __func__, (uintmax_t)ooa_hdr->lun_num); retval = EINVAL; break; } cur_fill_num = 0; if (ooa_hdr->flags & CTL_OOA_FLAG_ALL_LUNS) { STAILQ_FOREACH(lun, &softc->lun_list, links) { retval = ctl_ioctl_fill_ooa(lun, &cur_fill_num, ooa_hdr, entries); if (retval != 0) break; } if (retval != 0) { mtx_unlock(&softc->ctl_lock); free(entries, M_CTL); break; } } else { lun = softc->ctl_luns[ooa_hdr->lun_num]; retval = ctl_ioctl_fill_ooa(lun, &cur_fill_num,ooa_hdr, entries); } mtx_unlock(&softc->ctl_lock); ooa_hdr->fill_num = min(cur_fill_num, ooa_hdr->alloc_num); ooa_hdr->fill_len = ooa_hdr->fill_num * sizeof(struct ctl_ooa_entry); retval = copyout(entries, ooa_hdr->entries, ooa_hdr->fill_len); if (retval != 0) { printf("%s: error copying out %d bytes for OOA dump\n", __func__, ooa_hdr->fill_len); } getbintime(&ooa_hdr->cur_bt); if (cur_fill_num > ooa_hdr->alloc_num) { ooa_hdr->dropped_num = cur_fill_num -ooa_hdr->alloc_num; ooa_hdr->status = CTL_OOA_NEED_MORE_SPACE; } else { ooa_hdr->dropped_num = 0; ooa_hdr->status = CTL_OOA_OK; } free(entries, M_CTL); break; } case CTL_CHECK_OOA: { union ctl_io *io; struct ctl_ooa_info *ooa_info; ooa_info = (struct ctl_ooa_info *)addr; if (ooa_info->lun_id >= CTL_MAX_LUNS) { ooa_info->status = CTL_OOA_INVALID_LUN; break; } mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[ooa_info->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); ooa_info->status = CTL_OOA_INVALID_LUN; break; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); ooa_info->num_entries = 0; for (io = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); io != NULL; io = (union ctl_io *)TAILQ_NEXT( &io->io_hdr, ooa_links)) { ooa_info->num_entries++; } mtx_unlock(&lun->lun_lock); ooa_info->status = CTL_OOA_SUCCESS; break; } case CTL_DELAY_IO: { struct ctl_io_delay_info *delay_info; delay_info = (struct ctl_io_delay_info *)addr; #ifdef CTL_IO_DELAY mtx_lock(&softc->ctl_lock); if ((delay_info->lun_id >= CTL_MAX_LUNS) || (softc->ctl_luns[delay_info->lun_id] == NULL)) { delay_info->status = CTL_DELAY_STATUS_INVALID_LUN; } else { lun = softc->ctl_luns[delay_info->lun_id]; mtx_lock(&lun->lun_lock); delay_info->status = CTL_DELAY_STATUS_OK; switch (delay_info->delay_type) { case CTL_DELAY_TYPE_CONT: break; case CTL_DELAY_TYPE_ONESHOT: break; default: delay_info->status = CTL_DELAY_STATUS_INVALID_TYPE; break; } switch (delay_info->delay_loc) { case CTL_DELAY_LOC_DATAMOVE: lun->delay_info.datamove_type = delay_info->delay_type; lun->delay_info.datamove_delay = delay_info->delay_secs; break; case CTL_DELAY_LOC_DONE: lun->delay_info.done_type = delay_info->delay_type; lun->delay_info.done_delay = delay_info->delay_secs; break; default: delay_info->status = CTL_DELAY_STATUS_INVALID_LOC; break; } mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); #else delay_info->status = CTL_DELAY_STATUS_NOT_IMPLEMENTED; #endif /* CTL_IO_DELAY */ break; } case CTL_REALSYNC_SET: { int *syncstate; syncstate = (int *)addr; mtx_lock(&softc->ctl_lock); switch (*syncstate) { case 0: softc->flags &= ~CTL_FLAG_REAL_SYNC; break; case 1: softc->flags |= CTL_FLAG_REAL_SYNC; break; default: retval = EINVAL; break; } mtx_unlock(&softc->ctl_lock); break; } case CTL_REALSYNC_GET: { int *syncstate; syncstate = (int*)addr; mtx_lock(&softc->ctl_lock); if (softc->flags & CTL_FLAG_REAL_SYNC) *syncstate = 1; else *syncstate = 0; mtx_unlock(&softc->ctl_lock); break; } case CTL_SETSYNC: case CTL_GETSYNC: { struct ctl_sync_info *sync_info; sync_info = (struct ctl_sync_info *)addr; mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[sync_info->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); sync_info->status = CTL_GS_SYNC_NO_LUN; break; } /* * Get or set the sync interval. We're not bounds checking * in the set case, hopefully the user won't do something * silly. */ mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (cmd == CTL_GETSYNC) sync_info->sync_interval = lun->sync_interval; else lun->sync_interval = sync_info->sync_interval; mtx_unlock(&lun->lun_lock); sync_info->status = CTL_GS_SYNC_OK; break; } case CTL_GETSTATS: { struct ctl_stats *stats; int i; stats = (struct ctl_stats *)addr; if ((sizeof(struct ctl_lun_io_stats) * softc->num_luns) > stats->alloc_len) { stats->status = CTL_SS_NEED_MORE_SPACE; stats->num_luns = softc->num_luns; break; } /* * XXX KDM no locking here. If the LUN list changes, * things can blow up. */ for (i = 0, lun = STAILQ_FIRST(&softc->lun_list); lun != NULL; i++, lun = STAILQ_NEXT(lun, links)) { retval = copyout(&lun->stats, &stats->lun_stats[i], sizeof(lun->stats)); if (retval != 0) break; } stats->num_luns = softc->num_luns; stats->fill_len = sizeof(struct ctl_lun_io_stats) * softc->num_luns; stats->status = CTL_SS_OK; #ifdef CTL_TIME_IO stats->flags = CTL_STATS_FLAG_TIME_VALID; #else stats->flags = CTL_STATS_FLAG_NONE; #endif getnanouptime(&stats->timestamp); break; } case CTL_ERROR_INJECT: { struct ctl_error_desc *err_desc, *new_err_desc; err_desc = (struct ctl_error_desc *)addr; new_err_desc = malloc(sizeof(*new_err_desc), M_CTL, M_WAITOK | M_ZERO); bcopy(err_desc, new_err_desc, sizeof(*new_err_desc)); mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[err_desc->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); free(new_err_desc, M_CTL); printf("%s: CTL_ERROR_INJECT: invalid LUN %ju\n", __func__, (uintmax_t)err_desc->lun_id); retval = EINVAL; break; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); /* * We could do some checking here to verify the validity * of the request, but given the complexity of error * injection requests, the checking logic would be fairly * complex. * * For now, if the request is invalid, it just won't get * executed and might get deleted. */ STAILQ_INSERT_TAIL(&lun->error_list, new_err_desc, links); /* * XXX KDM check to make sure the serial number is unique, * in case we somehow manage to wrap. That shouldn't * happen for a very long time, but it's the right thing to * do. */ new_err_desc->serial = lun->error_serial; err_desc->serial = lun->error_serial; lun->error_serial++; mtx_unlock(&lun->lun_lock); break; } case CTL_ERROR_INJECT_DELETE: { struct ctl_error_desc *delete_desc, *desc, *desc2; int delete_done; delete_desc = (struct ctl_error_desc *)addr; delete_done = 0; mtx_lock(&softc->ctl_lock); lun = softc->ctl_luns[delete_desc->lun_id]; if (lun == NULL) { mtx_unlock(&softc->ctl_lock); printf("%s: CTL_ERROR_INJECT_DELETE: invalid LUN %ju\n", __func__, (uintmax_t)delete_desc->lun_id); retval = EINVAL; break; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) { if (desc->serial != delete_desc->serial) continue; STAILQ_REMOVE(&lun->error_list, desc, ctl_error_desc, links); free(desc, M_CTL); delete_done = 1; } mtx_unlock(&lun->lun_lock); if (delete_done == 0) { printf("%s: CTL_ERROR_INJECT_DELETE: can't find " "error serial %ju on LUN %u\n", __func__, delete_desc->serial, delete_desc->lun_id); retval = EINVAL; break; } break; } case CTL_DUMP_STRUCTS: { int i, j, k; struct ctl_port *port; struct ctl_frontend *fe; mtx_lock(&softc->ctl_lock); printf("CTL Persistent Reservation information start:\n"); for (i = 0; i < CTL_MAX_LUNS; i++) { lun = softc->ctl_luns[i]; if ((lun == NULL) || ((lun->flags & CTL_LUN_DISABLED) != 0)) continue; for (j = 0; j < CTL_MAX_PORTS; j++) { if (lun->pr_keys[j] == NULL) continue; for (k = 0; k < CTL_MAX_INIT_PER_PORT; k++){ if (lun->pr_keys[j][k] == 0) continue; printf(" LUN %d port %d iid %d key " "%#jx\n", i, j, k, (uintmax_t)lun->pr_keys[j][k]); } } } printf("CTL Persistent Reservation information end\n"); printf("CTL Ports:\n"); STAILQ_FOREACH(port, &softc->port_list, links) { printf(" Port %d '%s' Frontend '%s' Type %u pp %d vp %d WWNN " "%#jx WWPN %#jx\n", port->targ_port, port->port_name, port->frontend->name, port->port_type, port->physical_port, port->virtual_port, (uintmax_t)port->wwnn, (uintmax_t)port->wwpn); for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (port->wwpn_iid[j].in_use == 0 && port->wwpn_iid[j].wwpn == 0 && port->wwpn_iid[j].name == NULL) continue; printf(" iid %u use %d WWPN %#jx '%s'\n", j, port->wwpn_iid[j].in_use, (uintmax_t)port->wwpn_iid[j].wwpn, port->wwpn_iid[j].name); } } printf("CTL Port information end\n"); mtx_unlock(&softc->ctl_lock); /* * XXX KDM calling this without a lock. We'd likely want * to drop the lock before calling the frontend's dump * routine anyway. */ printf("CTL Frontends:\n"); STAILQ_FOREACH(fe, &softc->fe_list, links) { printf(" Frontend '%s'\n", fe->name); if (fe->fe_dump != NULL) fe->fe_dump(); } printf("CTL Frontend information end\n"); break; } case CTL_LUN_REQ: { struct ctl_lun_req *lun_req; struct ctl_backend_driver *backend; lun_req = (struct ctl_lun_req *)addr; backend = ctl_backend_find(lun_req->backend); if (backend == NULL) { lun_req->status = CTL_LUN_ERROR; snprintf(lun_req->error_str, sizeof(lun_req->error_str), "Backend \"%s\" not found.", lun_req->backend); break; } if (lun_req->num_be_args > 0) { lun_req->kern_be_args = ctl_copyin_args( lun_req->num_be_args, lun_req->be_args, lun_req->error_str, sizeof(lun_req->error_str)); if (lun_req->kern_be_args == NULL) { lun_req->status = CTL_LUN_ERROR; break; } } retval = backend->ioctl(dev, cmd, addr, flag, td); if (lun_req->num_be_args > 0) { ctl_copyout_args(lun_req->num_be_args, lun_req->kern_be_args); ctl_free_args(lun_req->num_be_args, lun_req->kern_be_args); } break; } case CTL_LUN_LIST: { struct sbuf *sb; struct ctl_lun_list *list; struct ctl_option *opt; list = (struct ctl_lun_list *)addr; /* * Allocate a fixed length sbuf here, based on the length * of the user's buffer. We could allocate an auto-extending * buffer, and then tell the user how much larger our * amount of data is than his buffer, but that presents * some problems: * * 1. The sbuf(9) routines use a blocking malloc, and so * we can't hold a lock while calling them with an * auto-extending buffer. * * 2. There is not currently a LUN reference counting * mechanism, outside of outstanding transactions on * the LUN's OOA queue. So a LUN could go away on us * while we're getting the LUN number, backend-specific * information, etc. Thus, given the way things * currently work, we need to hold the CTL lock while * grabbing LUN information. * * So, from the user's standpoint, the best thing to do is * allocate what he thinks is a reasonable buffer length, * and then if he gets a CTL_LUN_LIST_NEED_MORE_SPACE error, * double the buffer length and try again. (And repeat * that until he succeeds.) */ sb = sbuf_new(NULL, NULL, list->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { list->status = CTL_LUN_LIST_ERROR; snprintf(list->error_str, sizeof(list->error_str), "Unable to allocate %d bytes for LUN list", list->alloc_len); break; } sbuf_printf(sb, "\n"); mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); retval = sbuf_printf(sb, "\n", (uintmax_t)lun->lun); /* * Bail out as soon as we see that we've overfilled * the buffer. */ if (retval != 0) break; retval = sbuf_printf(sb, "\t%s" "\n", (lun->backend == NULL) ? "none" : lun->backend->name); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", lun->be_lun->lun_type); if (retval != 0) break; if (lun->backend == NULL) { retval = sbuf_printf(sb, "\n"); if (retval != 0) break; continue; } retval = sbuf_printf(sb, "\t%ju\n", (lun->be_lun->maxlba > 0) ? lun->be_lun->maxlba + 1 : 0); if (retval != 0) break; retval = sbuf_printf(sb, "\t%u\n", lun->be_lun->blocksize); if (retval != 0) break; retval = sbuf_printf(sb, "\t"); if (retval != 0) break; retval = ctl_sbuf_printf_esc(sb, lun->be_lun->serial_num, sizeof(lun->be_lun->serial_num)); if (retval != 0) break; retval = sbuf_printf(sb, "\n"); if (retval != 0) break; retval = sbuf_printf(sb, "\t"); if (retval != 0) break; retval = ctl_sbuf_printf_esc(sb, lun->be_lun->device_id, sizeof(lun->be_lun->device_id)); if (retval != 0) break; retval = sbuf_printf(sb, "\n"); if (retval != 0) break; if (lun->backend->lun_info != NULL) { retval = lun->backend->lun_info(lun->be_lun->be_lun, sb); if (retval != 0) break; } STAILQ_FOREACH(opt, &lun->be_lun->options, links) { retval = sbuf_printf(sb, "\t<%s>%s\n", opt->name, opt->value, opt->name); if (retval != 0) break; } retval = sbuf_printf(sb, "\n"); if (retval != 0) break; mtx_unlock(&lun->lun_lock); } if (lun != NULL) mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if ((retval != 0) || ((retval = sbuf_printf(sb, "\n")) != 0)) { retval = 0; sbuf_delete(sb); list->status = CTL_LUN_LIST_NEED_MORE_SPACE; snprintf(list->error_str, sizeof(list->error_str), "Out of space, %d bytes is too small", list->alloc_len); break; } sbuf_finish(sb); retval = copyout(sbuf_data(sb), list->lun_xml, sbuf_len(sb) + 1); list->fill_len = sbuf_len(sb) + 1; list->status = CTL_LUN_LIST_OK; sbuf_delete(sb); break; } case CTL_ISCSI: { struct ctl_iscsi *ci; struct ctl_frontend *fe; ci = (struct ctl_iscsi *)addr; fe = ctl_frontend_find("iscsi"); if (fe == NULL) { ci->status = CTL_ISCSI_ERROR; snprintf(ci->error_str, sizeof(ci->error_str), "Frontend \"iscsi\" not found."); break; } retval = fe->ioctl(dev, cmd, addr, flag, td); break; } case CTL_PORT_REQ: { struct ctl_req *req; struct ctl_frontend *fe; req = (struct ctl_req *)addr; fe = ctl_frontend_find(req->driver); if (fe == NULL) { req->status = CTL_LUN_ERROR; snprintf(req->error_str, sizeof(req->error_str), "Frontend \"%s\" not found.", req->driver); break; } if (req->num_args > 0) { req->kern_args = ctl_copyin_args(req->num_args, req->args, req->error_str, sizeof(req->error_str)); if (req->kern_args == NULL) { req->status = CTL_LUN_ERROR; break; } } if (fe->ioctl) retval = fe->ioctl(dev, cmd, addr, flag, td); else retval = ENODEV; if (req->num_args > 0) { ctl_copyout_args(req->num_args, req->kern_args); ctl_free_args(req->num_args, req->kern_args); } break; } case CTL_PORT_LIST: { struct sbuf *sb; struct ctl_port *port; struct ctl_lun_list *list; struct ctl_option *opt; int j; uint32_t plun; list = (struct ctl_lun_list *)addr; sb = sbuf_new(NULL, NULL, list->alloc_len, SBUF_FIXEDLEN); if (sb == NULL) { list->status = CTL_LUN_LIST_ERROR; snprintf(list->error_str, sizeof(list->error_str), "Unable to allocate %d bytes for LUN list", list->alloc_len); break; } sbuf_printf(sb, "\n"); mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { retval = sbuf_printf(sb, "\n", (uintmax_t)port->targ_port); /* * Bail out as soon as we see that we've overfilled * the buffer. */ if (retval != 0) break; retval = sbuf_printf(sb, "\t%s" "\n", port->frontend->name); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", port->port_type); if (retval != 0) break; retval = sbuf_printf(sb, "\t%s\n", (port->status & CTL_PORT_STATUS_ONLINE) ? "YES" : "NO"); if (retval != 0) break; retval = sbuf_printf(sb, "\t%s\n", port->port_name); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", port->physical_port); if (retval != 0) break; retval = sbuf_printf(sb, "\t%d\n", port->virtual_port); if (retval != 0) break; if (port->target_devid != NULL) { sbuf_printf(sb, "\t"); ctl_id_sbuf(port->target_devid, sb); sbuf_printf(sb, "\n"); } if (port->port_devid != NULL) { sbuf_printf(sb, "\t"); ctl_id_sbuf(port->port_devid, sb); sbuf_printf(sb, "\n"); } if (port->port_info != NULL) { retval = port->port_info(port->onoff_arg, sb); if (retval != 0) break; } STAILQ_FOREACH(opt, &port->options, links) { retval = sbuf_printf(sb, "\t<%s>%s\n", opt->name, opt->value, opt->name); if (retval != 0) break; } if (port->lun_map != NULL) { sbuf_printf(sb, "\ton\n"); for (j = 0; j < CTL_MAX_LUNS; j++) { plun = ctl_lun_map_from_port(port, j); if (plun >= CTL_MAX_LUNS) continue; sbuf_printf(sb, "\t%u\n", j, plun); } } for (j = 0; j < CTL_MAX_INIT_PER_PORT; j++) { if (port->wwpn_iid[j].in_use == 0 || (port->wwpn_iid[j].wwpn == 0 && port->wwpn_iid[j].name == NULL)) continue; if (port->wwpn_iid[j].name != NULL) retval = sbuf_printf(sb, "\t%s\n", j, port->wwpn_iid[j].name); else retval = sbuf_printf(sb, "\tnaa.%08jx\n", j, port->wwpn_iid[j].wwpn); if (retval != 0) break; } if (retval != 0) break; retval = sbuf_printf(sb, "\n"); if (retval != 0) break; } mtx_unlock(&softc->ctl_lock); if ((retval != 0) || ((retval = sbuf_printf(sb, "\n")) != 0)) { retval = 0; sbuf_delete(sb); list->status = CTL_LUN_LIST_NEED_MORE_SPACE; snprintf(list->error_str, sizeof(list->error_str), "Out of space, %d bytes is too small", list->alloc_len); break; } sbuf_finish(sb); retval = copyout(sbuf_data(sb), list->lun_xml, sbuf_len(sb) + 1); list->fill_len = sbuf_len(sb) + 1; list->status = CTL_LUN_LIST_OK; sbuf_delete(sb); break; } case CTL_LUN_MAP: { struct ctl_lun_map *lm = (struct ctl_lun_map *)addr; struct ctl_port *port; mtx_lock(&softc->ctl_lock); if (lm->port < softc->port_min || lm->port >= softc->port_max || (port = softc->ctl_ports[lm->port]) == NULL) { mtx_unlock(&softc->ctl_lock); return (ENXIO); } if (port->status & CTL_PORT_STATUS_ONLINE) { STAILQ_FOREACH(lun, &softc->lun_list, links) { if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; mtx_lock(&lun->lun_lock); ctl_est_ua_port(lun, lm->port, -1, CTL_UA_LUN_CHANGE); mtx_unlock(&lun->lun_lock); } } mtx_unlock(&softc->ctl_lock); // XXX: port_enable sleeps if (lm->plun < CTL_MAX_LUNS) { if (lm->lun == UINT32_MAX) retval = ctl_lun_map_unset(port, lm->plun); else if (lm->lun < CTL_MAX_LUNS && softc->ctl_luns[lm->lun] != NULL) retval = ctl_lun_map_set(port, lm->plun, lm->lun); else return (ENXIO); } else if (lm->plun == UINT32_MAX) { if (lm->lun == UINT32_MAX) retval = ctl_lun_map_deinit(port); else retval = ctl_lun_map_init(port); } else return (ENXIO); if (port->status & CTL_PORT_STATUS_ONLINE) ctl_isc_announce_port(port); break; } default: { /* XXX KDM should we fix this? */ #if 0 struct ctl_backend_driver *backend; unsigned int type; int found; found = 0; /* * We encode the backend type as the ioctl type for backend * ioctls. So parse it out here, and then search for a * backend of this type. */ type = _IOC_TYPE(cmd); STAILQ_FOREACH(backend, &softc->be_list, links) { if (backend->type == type) { found = 1; break; } } if (found == 0) { printf("ctl: unknown ioctl command %#lx or backend " "%d\n", cmd, type); retval = EINVAL; break; } retval = backend->ioctl(dev, cmd, addr, flag, td); #endif retval = ENOTTY; break; } } return (retval); } uint32_t ctl_get_initindex(struct ctl_nexus *nexus) { return (nexus->initid + (nexus->targ_port * CTL_MAX_INIT_PER_PORT)); } int ctl_lun_map_init(struct ctl_port *port) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; uint32_t i; if (port->lun_map == NULL) port->lun_map = malloc(sizeof(uint32_t) * CTL_MAX_LUNS, M_CTL, M_NOWAIT); if (port->lun_map == NULL) return (ENOMEM); for (i = 0; i < CTL_MAX_LUNS; i++) port->lun_map[i] = UINT32_MAX; if (port->status & CTL_PORT_STATUS_ONLINE) { if (port->lun_disable != NULL) { STAILQ_FOREACH(lun, &softc->lun_list, links) port->lun_disable(port->targ_lun_arg, lun->lun); } ctl_isc_announce_port(port); } return (0); } int ctl_lun_map_deinit(struct ctl_port *port) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; if (port->lun_map == NULL) return (0); free(port->lun_map, M_CTL); port->lun_map = NULL; if (port->status & CTL_PORT_STATUS_ONLINE) { if (port->lun_enable != NULL) { STAILQ_FOREACH(lun, &softc->lun_list, links) port->lun_enable(port->targ_lun_arg, lun->lun); } ctl_isc_announce_port(port); } return (0); } int ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun) { int status; uint32_t old; if (port->lun_map == NULL) { status = ctl_lun_map_init(port); if (status != 0) return (status); } old = port->lun_map[plun]; port->lun_map[plun] = glun; if ((port->status & CTL_PORT_STATUS_ONLINE) && old >= CTL_MAX_LUNS) { if (port->lun_enable != NULL) port->lun_enable(port->targ_lun_arg, plun); ctl_isc_announce_port(port); } return (0); } int ctl_lun_map_unset(struct ctl_port *port, uint32_t plun) { uint32_t old; if (port->lun_map == NULL) return (0); old = port->lun_map[plun]; port->lun_map[plun] = UINT32_MAX; if ((port->status & CTL_PORT_STATUS_ONLINE) && old < CTL_MAX_LUNS) { if (port->lun_disable != NULL) port->lun_disable(port->targ_lun_arg, plun); ctl_isc_announce_port(port); } return (0); } uint32_t ctl_lun_map_from_port(struct ctl_port *port, uint32_t lun_id) { if (port == NULL) return (UINT32_MAX); if (port->lun_map == NULL || lun_id >= CTL_MAX_LUNS) return (lun_id); return (port->lun_map[lun_id]); } uint32_t ctl_lun_map_to_port(struct ctl_port *port, uint32_t lun_id) { uint32_t i; if (port == NULL) return (UINT32_MAX); if (port->lun_map == NULL) return (lun_id); for (i = 0; i < CTL_MAX_LUNS; i++) { if (port->lun_map[i] == lun_id) return (i); } return (UINT32_MAX); } static struct ctl_port * ctl_io_port(struct ctl_io_hdr *io_hdr) { return (control_softc->ctl_ports[io_hdr->nexus.targ_port]); } int ctl_ffz(uint32_t *mask, uint32_t first, uint32_t last) { int i; for (i = first; i < last; i++) { if ((mask[i / 32] & (1 << (i % 32))) == 0) return (i); } return (-1); } int ctl_set_mask(uint32_t *mask, uint32_t bit) { uint32_t chunk, piece; chunk = bit >> 5; piece = bit % (sizeof(uint32_t) * 8); if ((mask[chunk] & (1 << piece)) != 0) return (-1); else mask[chunk] |= (1 << piece); return (0); } int ctl_clear_mask(uint32_t *mask, uint32_t bit) { uint32_t chunk, piece; chunk = bit >> 5; piece = bit % (sizeof(uint32_t) * 8); if ((mask[chunk] & (1 << piece)) == 0) return (-1); else mask[chunk] &= ~(1 << piece); return (0); } int ctl_is_set(uint32_t *mask, uint32_t bit) { uint32_t chunk, piece; chunk = bit >> 5; piece = bit % (sizeof(uint32_t) * 8); if ((mask[chunk] & (1 << piece)) == 0) return (0); else return (1); } static uint64_t ctl_get_prkey(struct ctl_lun *lun, uint32_t residx) { uint64_t *t; t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT]; if (t == NULL) return (0); return (t[residx % CTL_MAX_INIT_PER_PORT]); } static void ctl_clr_prkey(struct ctl_lun *lun, uint32_t residx) { uint64_t *t; t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT]; if (t == NULL) return; t[residx % CTL_MAX_INIT_PER_PORT] = 0; } static void ctl_alloc_prkey(struct ctl_lun *lun, uint32_t residx) { uint64_t *p; u_int i; i = residx/CTL_MAX_INIT_PER_PORT; if (lun->pr_keys[i] != NULL) return; mtx_unlock(&lun->lun_lock); p = malloc(sizeof(uint64_t) * CTL_MAX_INIT_PER_PORT, M_CTL, M_WAITOK | M_ZERO); mtx_lock(&lun->lun_lock); if (lun->pr_keys[i] == NULL) lun->pr_keys[i] = p; else free(p, M_CTL); } static void ctl_set_prkey(struct ctl_lun *lun, uint32_t residx, uint64_t key) { uint64_t *t; t = lun->pr_keys[residx/CTL_MAX_INIT_PER_PORT]; KASSERT(t != NULL, ("prkey %d is not allocated", residx)); t[residx % CTL_MAX_INIT_PER_PORT] = key; } /* * ctl_softc, pool_name, total_ctl_io are passed in. * npool is passed out. */ int ctl_pool_create(struct ctl_softc *ctl_softc, const char *pool_name, uint32_t total_ctl_io, void **npool) { #ifdef IO_POOLS struct ctl_io_pool *pool; pool = (struct ctl_io_pool *)malloc(sizeof(*pool), M_CTL, M_NOWAIT | M_ZERO); if (pool == NULL) return (ENOMEM); snprintf(pool->name, sizeof(pool->name), "CTL IO %s", pool_name); pool->ctl_softc = ctl_softc; pool->zone = uma_zsecond_create(pool->name, NULL, NULL, NULL, NULL, ctl_softc->io_zone); /* uma_prealloc(pool->zone, total_ctl_io); */ *npool = pool; #else *npool = ctl_softc->io_zone; #endif return (0); } void ctl_pool_free(struct ctl_io_pool *pool) { if (pool == NULL) return; #ifdef IO_POOLS uma_zdestroy(pool->zone); free(pool, M_CTL); #endif } union ctl_io * ctl_alloc_io(void *pool_ref) { union ctl_io *io; #ifdef IO_POOLS struct ctl_io_pool *pool = (struct ctl_io_pool *)pool_ref; io = uma_zalloc(pool->zone, M_WAITOK); #else io = uma_zalloc((uma_zone_t)pool_ref, M_WAITOK); #endif if (io != NULL) io->io_hdr.pool = pool_ref; return (io); } union ctl_io * ctl_alloc_io_nowait(void *pool_ref) { union ctl_io *io; #ifdef IO_POOLS struct ctl_io_pool *pool = (struct ctl_io_pool *)pool_ref; io = uma_zalloc(pool->zone, M_NOWAIT); #else io = uma_zalloc((uma_zone_t)pool_ref, M_NOWAIT); #endif if (io != NULL) io->io_hdr.pool = pool_ref; return (io); } void ctl_free_io(union ctl_io *io) { #ifdef IO_POOLS struct ctl_io_pool *pool; #endif if (io == NULL) return; #ifdef IO_POOLS pool = (struct ctl_io_pool *)io->io_hdr.pool; uma_zfree(pool->zone, io); #else uma_zfree((uma_zone_t)io->io_hdr.pool, io); #endif } void ctl_zero_io(union ctl_io *io) { void *pool_ref; if (io == NULL) return; /* * May need to preserve linked list pointers at some point too. */ pool_ref = io->io_hdr.pool; memset(io, 0, sizeof(*io)); io->io_hdr.pool = pool_ref; } /* * This routine is currently used for internal copies of ctl_ios that need * to persist for some reason after we've already returned status to the * FETD. (Thus the flag set.) * * XXX XXX * Note that this makes a blind copy of all fields in the ctl_io, except * for the pool reference. This includes any memory that has been * allocated! That memory will no longer be valid after done has been * called, so this would be VERY DANGEROUS for command that actually does * any reads or writes. Right now (11/7/2005), this is only used for immediate * start and stop commands, which don't transfer any data, so this is not a * problem. If it is used for anything else, the caller would also need to * allocate data buffer space and this routine would need to be modified to * copy the data buffer(s) as well. */ void ctl_copy_io(union ctl_io *src, union ctl_io *dest) { void *pool_ref; if ((src == NULL) || (dest == NULL)) return; /* * May need to preserve linked list pointers at some point too. */ pool_ref = dest->io_hdr.pool; memcpy(dest, src, MIN(sizeof(*src), sizeof(*dest))); dest->io_hdr.pool = pool_ref; /* * We need to know that this is an internal copy, and doesn't need * to get passed back to the FETD that allocated it. */ dest->io_hdr.flags |= CTL_FLAG_INT_COPY; } int ctl_expand_number(const char *buf, uint64_t *num) { char *endptr; uint64_t number; unsigned shift; number = strtoq(buf, &endptr, 0); switch (tolower((unsigned char)*endptr)) { case 'e': shift = 60; break; case 'p': shift = 50; break; case 't': shift = 40; break; case 'g': shift = 30; break; case 'm': shift = 20; break; case 'k': shift = 10; break; case 'b': case '\0': /* No unit. */ *num = number; return (0); default: /* Unrecognized unit. */ return (-1); } if ((number << shift) >> shift != number) { /* Overflow */ return (-1); } *num = number << shift; return (0); } /* * This routine could be used in the future to load default and/or saved * mode page parameters for a particuar lun. */ static int ctl_init_page_index(struct ctl_lun *lun) { int i; struct ctl_page_index *page_index; const char *value; uint64_t ival; memcpy(&lun->mode_pages.index, page_index_template, sizeof(page_index_template)); for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { page_index = &lun->mode_pages.index[i]; /* * If this is a disk-only mode page, there's no point in * setting it up. For some pages, we have to have some * basic information about the disk in order to calculate the * mode page data. */ if ((lun->be_lun->lun_type != T_DIRECT) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; switch (page_index->page_code & SMPH_PC_MASK) { case SMS_RW_ERROR_RECOVERY_PAGE: { if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("subpage is incorrect!"); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_CURRENT], &rw_er_page_default, sizeof(rw_er_page_default)); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_CHANGEABLE], &rw_er_page_changeable, sizeof(rw_er_page_changeable)); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_DEFAULT], &rw_er_page_default, sizeof(rw_er_page_default)); memcpy(&lun->mode_pages.rw_er_page[CTL_PAGE_SAVED], &rw_er_page_default, sizeof(rw_er_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.rw_er_page; break; } case SMS_FORMAT_DEVICE_PAGE: { struct scsi_format_page *format_page; if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("subpage is incorrect!"); /* * Sectors per track are set above. Bytes per * sector need to be set here on a per-LUN basis. */ memcpy(&lun->mode_pages.format_page[CTL_PAGE_CURRENT], &format_page_default, sizeof(format_page_default)); memcpy(&lun->mode_pages.format_page[ CTL_PAGE_CHANGEABLE], &format_page_changeable, sizeof(format_page_changeable)); memcpy(&lun->mode_pages.format_page[CTL_PAGE_DEFAULT], &format_page_default, sizeof(format_page_default)); memcpy(&lun->mode_pages.format_page[CTL_PAGE_SAVED], &format_page_default, sizeof(format_page_default)); format_page = &lun->mode_pages.format_page[ CTL_PAGE_CURRENT]; scsi_ulto2b(lun->be_lun->blocksize, format_page->bytes_per_sector); format_page = &lun->mode_pages.format_page[ CTL_PAGE_DEFAULT]; scsi_ulto2b(lun->be_lun->blocksize, format_page->bytes_per_sector); format_page = &lun->mode_pages.format_page[ CTL_PAGE_SAVED]; scsi_ulto2b(lun->be_lun->blocksize, format_page->bytes_per_sector); page_index->page_data = (uint8_t *)lun->mode_pages.format_page; break; } case SMS_RIGID_DISK_PAGE: { struct scsi_rigid_disk_page *rigid_disk_page; uint32_t sectors_per_cylinder; uint64_t cylinders; #ifndef __XSCALE__ int shift; #endif /* !__XSCALE__ */ if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("invalid subpage value %d", page_index->subpage); /* * Rotation rate and sectors per track are set * above. We calculate the cylinders here based on * capacity. Due to the number of heads and * sectors per track we're using, smaller arrays * may turn out to have 0 cylinders. Linux and * FreeBSD don't pay attention to these mode pages * to figure out capacity, but Solaris does. It * seems to deal with 0 cylinders just fine, and * works out a fake geometry based on the capacity. */ memcpy(&lun->mode_pages.rigid_disk_page[ CTL_PAGE_DEFAULT], &rigid_disk_page_default, sizeof(rigid_disk_page_default)); memcpy(&lun->mode_pages.rigid_disk_page[ CTL_PAGE_CHANGEABLE],&rigid_disk_page_changeable, sizeof(rigid_disk_page_changeable)); sectors_per_cylinder = CTL_DEFAULT_SECTORS_PER_TRACK * CTL_DEFAULT_HEADS; /* * The divide method here will be more accurate, * probably, but results in floating point being * used in the kernel on i386 (__udivdi3()). On the * XScale, though, __udivdi3() is implemented in * software. * * The shift method for cylinder calculation is * accurate if sectors_per_cylinder is a power of * 2. Otherwise it might be slightly off -- you * might have a bit of a truncation problem. */ #ifdef __XSCALE__ cylinders = (lun->be_lun->maxlba + 1) / sectors_per_cylinder; #else for (shift = 31; shift > 0; shift--) { if (sectors_per_cylinder & (1 << shift)) break; } cylinders = (lun->be_lun->maxlba + 1) >> shift; #endif /* * We've basically got 3 bytes, or 24 bits for the * cylinder size in the mode page. If we're over, * just round down to 2^24. */ if (cylinders > 0xffffff) cylinders = 0xffffff; rigid_disk_page = &lun->mode_pages.rigid_disk_page[ CTL_PAGE_DEFAULT]; scsi_ulto3b(cylinders, rigid_disk_page->cylinders); if ((value = ctl_get_opt(&lun->be_lun->options, "rpm")) != NULL) { scsi_ulto2b(strtol(value, NULL, 0), rigid_disk_page->rotation_rate); } memcpy(&lun->mode_pages.rigid_disk_page[CTL_PAGE_CURRENT], &lun->mode_pages.rigid_disk_page[CTL_PAGE_DEFAULT], sizeof(rigid_disk_page_default)); memcpy(&lun->mode_pages.rigid_disk_page[CTL_PAGE_SAVED], &lun->mode_pages.rigid_disk_page[CTL_PAGE_DEFAULT], sizeof(rigid_disk_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.rigid_disk_page; break; } case SMS_CACHING_PAGE: { struct scsi_caching_page *caching_page; if (page_index->subpage != SMS_SUBPAGE_PAGE_0) panic("invalid subpage value %d", page_index->subpage); memcpy(&lun->mode_pages.caching_page[CTL_PAGE_DEFAULT], &caching_page_default, sizeof(caching_page_default)); memcpy(&lun->mode_pages.caching_page[ CTL_PAGE_CHANGEABLE], &caching_page_changeable, sizeof(caching_page_changeable)); memcpy(&lun->mode_pages.caching_page[CTL_PAGE_SAVED], &caching_page_default, sizeof(caching_page_default)); caching_page = &lun->mode_pages.caching_page[ CTL_PAGE_SAVED]; value = ctl_get_opt(&lun->be_lun->options, "writecache"); if (value != NULL && strcmp(value, "off") == 0) caching_page->flags1 &= ~SCP_WCE; value = ctl_get_opt(&lun->be_lun->options, "readcache"); if (value != NULL && strcmp(value, "off") == 0) caching_page->flags1 |= SCP_RCD; memcpy(&lun->mode_pages.caching_page[CTL_PAGE_CURRENT], &lun->mode_pages.caching_page[CTL_PAGE_SAVED], sizeof(caching_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.caching_page; break; } case SMS_CONTROL_MODE_PAGE: { - struct scsi_control_page *control_page; + switch (page_index->subpage) { + case SMS_SUBPAGE_PAGE_0: { + struct scsi_control_page *control_page; - if (page_index->subpage != SMS_SUBPAGE_PAGE_0) - panic("invalid subpage value %d", - page_index->subpage); - - memcpy(&lun->mode_pages.control_page[CTL_PAGE_DEFAULT], - &control_page_default, - sizeof(control_page_default)); - memcpy(&lun->mode_pages.control_page[ - CTL_PAGE_CHANGEABLE], &control_page_changeable, - sizeof(control_page_changeable)); - memcpy(&lun->mode_pages.control_page[CTL_PAGE_SAVED], - &control_page_default, - sizeof(control_page_default)); - control_page = &lun->mode_pages.control_page[ - CTL_PAGE_SAVED]; - value = ctl_get_opt(&lun->be_lun->options, "reordering"); - if (value != NULL && strcmp(value, "unrestricted") == 0) { - control_page->queue_flags &= ~SCP_QUEUE_ALG_MASK; - control_page->queue_flags |= SCP_QUEUE_ALG_UNRESTRICTED; + memcpy(&lun->mode_pages.control_page[ + CTL_PAGE_DEFAULT], + &control_page_default, + sizeof(control_page_default)); + memcpy(&lun->mode_pages.control_page[ + CTL_PAGE_CHANGEABLE], + &control_page_changeable, + sizeof(control_page_changeable)); + memcpy(&lun->mode_pages.control_page[ + CTL_PAGE_SAVED], + &control_page_default, + sizeof(control_page_default)); + control_page = &lun->mode_pages.control_page[ + CTL_PAGE_SAVED]; + value = ctl_get_opt(&lun->be_lun->options, + "reordering"); + if (value != NULL && + strcmp(value, "unrestricted") == 0) { + control_page->queue_flags &= + ~SCP_QUEUE_ALG_MASK; + control_page->queue_flags |= + SCP_QUEUE_ALG_UNRESTRICTED; + } + memcpy(&lun->mode_pages.control_page[ + CTL_PAGE_CURRENT], + &lun->mode_pages.control_page[ + CTL_PAGE_SAVED], + sizeof(control_page_default)); + page_index->page_data = + (uint8_t *)lun->mode_pages.control_page; + break; } - memcpy(&lun->mode_pages.control_page[CTL_PAGE_CURRENT], - &lun->mode_pages.control_page[CTL_PAGE_SAVED], - sizeof(control_page_default)); - page_index->page_data = - (uint8_t *)lun->mode_pages.control_page; + case 0x01: + memcpy(&lun->mode_pages.control_ext_page[ + CTL_PAGE_DEFAULT], + &control_ext_page_default, + sizeof(control_ext_page_default)); + memcpy(&lun->mode_pages.control_ext_page[ + CTL_PAGE_CHANGEABLE], + &control_ext_page_changeable, + sizeof(control_ext_page_changeable)); + memcpy(&lun->mode_pages.control_ext_page[ + CTL_PAGE_SAVED], + &control_ext_page_default, + sizeof(control_ext_page_default)); + memcpy(&lun->mode_pages.control_ext_page[ + CTL_PAGE_CURRENT], + &lun->mode_pages.control_ext_page[ + CTL_PAGE_SAVED], + sizeof(control_ext_page_default)); + page_index->page_data = + (uint8_t *)lun->mode_pages.control_ext_page; + break; + } break; - } case SMS_INFO_EXCEPTIONS_PAGE: { switch (page_index->subpage) { case SMS_SUBPAGE_PAGE_0: memcpy(&lun->mode_pages.ie_page[CTL_PAGE_CURRENT], &ie_page_default, sizeof(ie_page_default)); memcpy(&lun->mode_pages.ie_page[ CTL_PAGE_CHANGEABLE], &ie_page_changeable, sizeof(ie_page_changeable)); memcpy(&lun->mode_pages.ie_page[CTL_PAGE_DEFAULT], &ie_page_default, sizeof(ie_page_default)); memcpy(&lun->mode_pages.ie_page[CTL_PAGE_SAVED], &ie_page_default, sizeof(ie_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.ie_page; break; case 0x02: { struct ctl_logical_block_provisioning_page *page; memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_DEFAULT], &lbp_page_default, sizeof(lbp_page_default)); memcpy(&lun->mode_pages.lbp_page[ CTL_PAGE_CHANGEABLE], &lbp_page_changeable, sizeof(lbp_page_changeable)); memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_SAVED], &lbp_page_default, sizeof(lbp_page_default)); page = &lun->mode_pages.lbp_page[CTL_PAGE_SAVED]; value = ctl_get_opt(&lun->be_lun->options, "avail-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[0].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_DEC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[0].count); } value = ctl_get_opt(&lun->be_lun->options, "used-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[1].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_INC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[1].count); } value = ctl_get_opt(&lun->be_lun->options, "pool-avail-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[2].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_DEC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[2].count); } value = ctl_get_opt(&lun->be_lun->options, "pool-used-threshold"); if (value != NULL && ctl_expand_number(value, &ival) == 0) { page->descr[3].flags |= SLBPPD_ENABLED | SLBPPD_ARMING_INC; if (lun->be_lun->blocksize) ival /= lun->be_lun->blocksize; else ival /= 512; scsi_ulto4b(ival >> CTL_LBP_EXPONENT, page->descr[3].count); } memcpy(&lun->mode_pages.lbp_page[CTL_PAGE_CURRENT], &lun->mode_pages.lbp_page[CTL_PAGE_SAVED], sizeof(lbp_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.lbp_page; }} break; } case SMS_VENDOR_SPECIFIC_PAGE:{ switch (page_index->subpage) { case DBGCNF_SUBPAGE_CODE: { struct copan_debugconf_subpage *current_page, *saved_page; memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_CURRENT], &debugconf_page_default, sizeof(debugconf_page_default)); memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_CHANGEABLE], &debugconf_page_changeable, sizeof(debugconf_page_changeable)); memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_DEFAULT], &debugconf_page_default, sizeof(debugconf_page_default)); memcpy(&lun->mode_pages.debugconf_subpage[ CTL_PAGE_SAVED], &debugconf_page_default, sizeof(debugconf_page_default)); page_index->page_data = (uint8_t *)lun->mode_pages.debugconf_subpage; current_page = (struct copan_debugconf_subpage *) (page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT)); saved_page = (struct copan_debugconf_subpage *) (page_index->page_data + (page_index->page_len * CTL_PAGE_SAVED)); break; } default: panic("invalid subpage value %d", page_index->subpage); break; } break; } default: panic("invalid page value %d", page_index->page_code & SMPH_PC_MASK); break; } } return (CTL_RETVAL_COMPLETE); } static int ctl_init_log_page_index(struct ctl_lun *lun) { struct ctl_page_index *page_index; int i, j, k, prev; memcpy(&lun->log_pages.index, log_page_index_template, sizeof(log_page_index_template)); prev = -1; for (i = 0, j = 0, k = 0; i < CTL_NUM_LOG_PAGES; i++) { page_index = &lun->log_pages.index[i]; /* * If this is a disk-only mode page, there's no point in * setting it up. For some pages, we have to have some * basic information about the disk in order to calculate the * mode page data. */ if ((lun->be_lun->lun_type != T_DIRECT) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; if (page_index->page_code == SLS_LOGICAL_BLOCK_PROVISIONING && lun->backend->lun_attr == NULL) continue; if (page_index->page_code != prev) { lun->log_pages.pages_page[j] = page_index->page_code; prev = page_index->page_code; j++; } lun->log_pages.subpages_page[k*2] = page_index->page_code; lun->log_pages.subpages_page[k*2+1] = page_index->subpage; k++; } lun->log_pages.index[0].page_data = &lun->log_pages.pages_page[0]; lun->log_pages.index[0].page_len = j; lun->log_pages.index[1].page_data = &lun->log_pages.subpages_page[0]; lun->log_pages.index[1].page_len = k * 2; lun->log_pages.index[2].page_data = &lun->log_pages.lbp_page[0]; lun->log_pages.index[2].page_len = 12*CTL_NUM_LBP_PARAMS; lun->log_pages.index[3].page_data = (uint8_t *)&lun->log_pages.stat_page; lun->log_pages.index[3].page_len = sizeof(lun->log_pages.stat_page); return (CTL_RETVAL_COMPLETE); } static int hex2bin(const char *str, uint8_t *buf, int buf_size) { int i; u_char c; memset(buf, 0, buf_size); while (isspace(str[0])) str++; if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) str += 2; buf_size *= 2; for (i = 0; str[i] != 0 && i < buf_size; i++) { c = str[i]; if (isdigit(c)) c -= '0'; else if (isalpha(c)) c -= isupper(c) ? 'A' - 10 : 'a' - 10; else break; if (c >= 16) break; if ((i & 1) == 0) buf[i / 2] |= (c << 4); else buf[i / 2] |= c; } return ((i + 1) / 2); } /* * LUN allocation. * * Requirements: * - caller allocates and zeros LUN storage, or passes in a NULL LUN if he * wants us to allocate the LUN and he can block. * - ctl_softc is always set * - be_lun is set if the LUN has a backend (needed for disk LUNs) * * Returns 0 for success, non-zero (errno) for failure. */ static int ctl_alloc_lun(struct ctl_softc *ctl_softc, struct ctl_lun *ctl_lun, struct ctl_be_lun *const be_lun) { struct ctl_lun *nlun, *lun; struct scsi_vpd_id_descriptor *desc; struct scsi_vpd_id_t10 *t10id; const char *eui, *naa, *scsiname, *vendor; int lun_number, i, lun_malloced; int devidlen, idlen1, idlen2 = 0, len; if (be_lun == NULL) return (EINVAL); /* * We currently only support Direct Access or Processor LUN types. */ switch (be_lun->lun_type) { case T_DIRECT: break; case T_PROCESSOR: break; case T_SEQUENTIAL: case T_CHANGER: default: be_lun->lun_config_status(be_lun->be_lun, CTL_LUN_CONFIG_FAILURE); break; } if (ctl_lun == NULL) { lun = malloc(sizeof(*lun), M_CTL, M_WAITOK); lun_malloced = 1; } else { lun_malloced = 0; lun = ctl_lun; } memset(lun, 0, sizeof(*lun)); if (lun_malloced) lun->flags = CTL_LUN_MALLOCED; /* Generate LUN ID. */ devidlen = max(CTL_DEVID_MIN_LEN, strnlen(be_lun->device_id, CTL_DEVID_LEN)); idlen1 = sizeof(*t10id) + devidlen; len = sizeof(struct scsi_vpd_id_descriptor) + idlen1; scsiname = ctl_get_opt(&be_lun->options, "scsiname"); if (scsiname != NULL) { idlen2 = roundup2(strlen(scsiname) + 1, 4); len += sizeof(struct scsi_vpd_id_descriptor) + idlen2; } eui = ctl_get_opt(&be_lun->options, "eui"); if (eui != NULL) { len += sizeof(struct scsi_vpd_id_descriptor) + 16; } naa = ctl_get_opt(&be_lun->options, "naa"); if (naa != NULL) { len += sizeof(struct scsi_vpd_id_descriptor) + 16; } lun->lun_devid = malloc(sizeof(struct ctl_devid) + len, M_CTL, M_WAITOK | M_ZERO); desc = (struct scsi_vpd_id_descriptor *)lun->lun_devid->data; desc->proto_codeset = SVPD_ID_CODESET_ASCII; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_T10; desc->length = idlen1; t10id = (struct scsi_vpd_id_t10 *)&desc->identifier[0]; memset(t10id->vendor, ' ', sizeof(t10id->vendor)); if ((vendor = ctl_get_opt(&be_lun->options, "vendor")) == NULL) { strncpy((char *)t10id->vendor, CTL_VENDOR, sizeof(t10id->vendor)); } else { strncpy(t10id->vendor, vendor, min(sizeof(t10id->vendor), strlen(vendor))); } strncpy((char *)t10id->vendor_spec_id, (char *)be_lun->device_id, devidlen); if (scsiname != NULL) { desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + desc->length); desc->proto_codeset = SVPD_ID_CODESET_UTF8; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_SCSI_NAME; desc->length = idlen2; strlcpy(desc->identifier, scsiname, idlen2); } if (eui != NULL) { desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + desc->length); desc->proto_codeset = SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_EUI64; desc->length = hex2bin(eui, desc->identifier, 16); desc->length = desc->length > 12 ? 16 : (desc->length > 8 ? 12 : 8); len -= 16 - desc->length; } if (naa != NULL) { desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + desc->length); desc->proto_codeset = SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_LUN | SVPD_ID_TYPE_NAA; desc->length = hex2bin(naa, desc->identifier, 16); desc->length = desc->length > 8 ? 16 : 8; len -= 16 - desc->length; } lun->lun_devid->len = len; mtx_lock(&ctl_softc->ctl_lock); /* * See if the caller requested a particular LUN number. If so, see * if it is available. Otherwise, allocate the first available LUN. */ if (be_lun->flags & CTL_LUN_FLAG_ID_REQ) { if ((be_lun->req_lun_id > (CTL_MAX_LUNS - 1)) || (ctl_is_set(ctl_softc->ctl_lun_mask, be_lun->req_lun_id))) { mtx_unlock(&ctl_softc->ctl_lock); if (be_lun->req_lun_id > (CTL_MAX_LUNS - 1)) { printf("ctl: requested LUN ID %d is higher " "than CTL_MAX_LUNS - 1 (%d)\n", be_lun->req_lun_id, CTL_MAX_LUNS - 1); } else { /* * XXX KDM return an error, or just assign * another LUN ID in this case?? */ printf("ctl: requested LUN ID %d is already " "in use\n", be_lun->req_lun_id); } if (lun->flags & CTL_LUN_MALLOCED) free(lun, M_CTL); be_lun->lun_config_status(be_lun->be_lun, CTL_LUN_CONFIG_FAILURE); return (ENOSPC); } lun_number = be_lun->req_lun_id; } else { lun_number = ctl_ffz(ctl_softc->ctl_lun_mask, 0, CTL_MAX_LUNS); if (lun_number == -1) { mtx_unlock(&ctl_softc->ctl_lock); printf("ctl: can't allocate LUN, out of LUNs\n"); if (lun->flags & CTL_LUN_MALLOCED) free(lun, M_CTL); be_lun->lun_config_status(be_lun->be_lun, CTL_LUN_CONFIG_FAILURE); return (ENOSPC); } } ctl_set_mask(ctl_softc->ctl_lun_mask, lun_number); mtx_init(&lun->lun_lock, "CTL LUN", NULL, MTX_DEF); lun->lun = lun_number; lun->be_lun = be_lun; /* * The processor LUN is always enabled. Disk LUNs come on line * disabled, and must be enabled by the backend. */ lun->flags |= CTL_LUN_DISABLED; lun->backend = be_lun->be; be_lun->ctl_lun = lun; be_lun->lun_id = lun_number; atomic_add_int(&be_lun->be->num_luns, 1); if (be_lun->flags & CTL_LUN_FLAG_OFFLINE) lun->flags |= CTL_LUN_OFFLINE; if (be_lun->flags & CTL_LUN_FLAG_POWERED_OFF) lun->flags |= CTL_LUN_STOPPED; if (be_lun->flags & CTL_LUN_FLAG_INOPERABLE) lun->flags |= CTL_LUN_INOPERABLE; if (be_lun->flags & CTL_LUN_FLAG_PRIMARY) lun->flags |= CTL_LUN_PRIMARY_SC; lun->ctl_softc = ctl_softc; #ifdef CTL_TIME_IO lun->last_busy = getsbinuptime(); #endif TAILQ_INIT(&lun->ooa_queue); TAILQ_INIT(&lun->blocked_queue); STAILQ_INIT(&lun->error_list); ctl_tpc_lun_init(lun); /* * Initialize the mode and log page index. */ ctl_init_page_index(lun); ctl_init_log_page_index(lun); /* * Now, before we insert this lun on the lun list, set the lun * inventory changed UA for all other luns. */ STAILQ_FOREACH(nlun, &ctl_softc->lun_list, links) { mtx_lock(&nlun->lun_lock); ctl_est_ua_all(nlun, -1, CTL_UA_LUN_CHANGE); mtx_unlock(&nlun->lun_lock); } STAILQ_INSERT_TAIL(&ctl_softc->lun_list, lun, links); ctl_softc->ctl_luns[lun_number] = lun; ctl_softc->num_luns++; /* Setup statistics gathering */ lun->stats.device_type = be_lun->lun_type; lun->stats.lun_number = lun_number; if (lun->stats.device_type == T_DIRECT) lun->stats.blocksize = be_lun->blocksize; else lun->stats.flags = CTL_LUN_STATS_NO_BLOCKSIZE; for (i = 0;i < CTL_MAX_PORTS;i++) lun->stats.ports[i].targ_port = i; mtx_unlock(&ctl_softc->ctl_lock); lun->be_lun->lun_config_status(lun->be_lun->be_lun, CTL_LUN_CONFIG_OK); return (0); } /* * Delete a LUN. * Assumptions: * - LUN has already been marked invalid and any pending I/O has been taken * care of. */ static int ctl_free_lun(struct ctl_lun *lun) { struct ctl_softc *softc; struct ctl_lun *nlun; int i; softc = lun->ctl_softc; mtx_assert(&softc->ctl_lock, MA_OWNED); STAILQ_REMOVE(&softc->lun_list, lun, ctl_lun, links); ctl_clear_mask(softc->ctl_lun_mask, lun->lun); softc->ctl_luns[lun->lun] = NULL; if (!TAILQ_EMPTY(&lun->ooa_queue)) panic("Freeing a LUN %p with outstanding I/O!!\n", lun); softc->num_luns--; /* * Tell the backend to free resources, if this LUN has a backend. */ atomic_subtract_int(&lun->be_lun->be->num_luns, 1); lun->be_lun->lun_shutdown(lun->be_lun->be_lun); ctl_tpc_lun_shutdown(lun); mtx_destroy(&lun->lun_lock); free(lun->lun_devid, M_CTL); for (i = 0; i < CTL_MAX_PORTS; i++) free(lun->pending_ua[i], M_CTL); for (i = 0; i < CTL_MAX_PORTS; i++) free(lun->pr_keys[i], M_CTL); free(lun->write_buffer, M_CTL); if (lun->flags & CTL_LUN_MALLOCED) free(lun, M_CTL); STAILQ_FOREACH(nlun, &softc->lun_list, links) { mtx_lock(&nlun->lun_lock); ctl_est_ua_all(nlun, -1, CTL_UA_LUN_CHANGE); mtx_unlock(&nlun->lun_lock); } return (0); } static void ctl_create_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; softc = control_softc; /* * ctl_alloc_lun() should handle all potential failure cases. */ ctl_alloc_lun(softc, NULL, be_lun); } int ctl_add_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc = control_softc; mtx_lock(&softc->ctl_lock); STAILQ_INSERT_TAIL(&softc->pending_lun_queue, be_lun, links); mtx_unlock(&softc->ctl_lock); wakeup(&softc->pending_lun_queue); return (0); } int ctl_enable_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; struct ctl_port *port, *nport; struct ctl_lun *lun; int retval; lun = (struct ctl_lun *)be_lun->ctl_lun; softc = lun->ctl_softc; mtx_lock(&softc->ctl_lock); mtx_lock(&lun->lun_lock); if ((lun->flags & CTL_LUN_DISABLED) == 0) { /* * eh? Why did we get called if the LUN is already * enabled? */ mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); return (0); } lun->flags &= ~CTL_LUN_DISABLED; mtx_unlock(&lun->lun_lock); for (port = STAILQ_FIRST(&softc->port_list); port != NULL; port = nport) { nport = STAILQ_NEXT(port, links); if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 || port->lun_map != NULL || port->lun_enable == NULL) continue; /* * Drop the lock while we call the FETD's enable routine. * This can lead to a callback into CTL (at least in the * case of the internal initiator frontend. */ mtx_unlock(&softc->ctl_lock); retval = port->lun_enable(port->targ_lun_arg, lun->lun); mtx_lock(&softc->ctl_lock); if (retval != 0) { printf("%s: FETD %s port %d returned error " "%d for lun_enable on lun %jd\n", __func__, port->port_name, port->targ_port, retval, (intmax_t)lun->lun); } } mtx_unlock(&softc->ctl_lock); ctl_isc_announce_lun(lun); return (0); } int ctl_disable_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; struct ctl_port *port; struct ctl_lun *lun; int retval; lun = (struct ctl_lun *)be_lun->ctl_lun; softc = lun->ctl_softc; mtx_lock(&softc->ctl_lock); mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); return (0); } lun->flags |= CTL_LUN_DISABLED; mtx_unlock(&lun->lun_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0 || port->lun_map != NULL || port->lun_disable == NULL) continue; /* * Drop the lock before we call the frontend's disable * routine, to avoid lock order reversals. * * XXX KDM what happens if the frontend list changes while * we're traversing it? It's unlikely, but should be handled. */ mtx_unlock(&softc->ctl_lock); retval = port->lun_disable(port->targ_lun_arg, lun->lun); mtx_lock(&softc->ctl_lock); if (retval != 0) { printf("%s: FETD %s port %d returned error " "%d for lun_disable on lun %jd\n", __func__, port->port_name, port->targ_port, retval, (intmax_t)lun->lun); } } mtx_unlock(&softc->ctl_lock); ctl_isc_announce_lun(lun); return (0); } int ctl_start_lun(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_STOPPED; mtx_unlock(&lun->lun_lock); return (0); } int ctl_stop_lun(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_STOPPED; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_offline(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_OFFLINE; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_online(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_OFFLINE; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_primary(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_PRIMARY_SC; ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); mtx_unlock(&lun->lun_lock); ctl_isc_announce_lun(lun); return (0); } int ctl_lun_secondary(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_PRIMARY_SC; ctl_est_ua_all(lun, -1, CTL_UA_ASYM_ACC_CHANGE); mtx_unlock(&lun->lun_lock); ctl_isc_announce_lun(lun); return (0); } int ctl_invalidate_lun(struct ctl_be_lun *be_lun) { struct ctl_softc *softc; struct ctl_lun *lun; lun = (struct ctl_lun *)be_lun->ctl_lun; softc = lun->ctl_softc; mtx_lock(&lun->lun_lock); /* * The LUN needs to be disabled before it can be marked invalid. */ if ((lun->flags & CTL_LUN_DISABLED) == 0) { mtx_unlock(&lun->lun_lock); return (-1); } /* * Mark the LUN invalid. */ lun->flags |= CTL_LUN_INVALID; /* * If there is nothing in the OOA queue, go ahead and free the LUN. * If we have something in the OOA queue, we'll free it when the * last I/O completes. */ if (TAILQ_EMPTY(&lun->ooa_queue)) { mtx_unlock(&lun->lun_lock); mtx_lock(&softc->ctl_lock); ctl_free_lun(lun); mtx_unlock(&softc->ctl_lock); } else mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_inoperable(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags |= CTL_LUN_INOPERABLE; mtx_unlock(&lun->lun_lock); return (0); } int ctl_lun_operable(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_INOPERABLE; mtx_unlock(&lun->lun_lock); return (0); } void ctl_lun_capacity_changed(struct ctl_be_lun *be_lun) { struct ctl_lun *lun = (struct ctl_lun *)be_lun->ctl_lun; union ctl_ha_msg msg; mtx_lock(&lun->lun_lock); ctl_est_ua_all(lun, -1, CTL_UA_CAPACITY_CHANGED); mtx_unlock(&lun->lun_lock); if (lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) { /* Send msg to other side. */ bzero(&msg.ua, sizeof(msg.ua)); msg.hdr.msg_type = CTL_MSG_UA; msg.hdr.nexus.initid = -1; msg.hdr.nexus.targ_port = -1; msg.hdr.nexus.targ_lun = lun->lun; msg.hdr.nexus.targ_mapped_lun = lun->lun; msg.ua.ua_all = 1; msg.ua.ua_set = 1; msg.ua.ua_type = CTL_UA_CAPACITY_CHANGED; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.ua), M_WAITOK); } } /* * Backend "memory move is complete" callback for requests that never * make it down to say RAIDCore's configuration code. */ int ctl_config_move_done(union ctl_io *io) { int retval; CTL_DEBUG_PRINT(("ctl_config_move_done\n")); KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Config I/O type isn't CTL_IO_SCSI (%d)!", io->io_hdr.io_type)); if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { /* * For hardware error sense keys, the sense key * specific value is defined to be a retry count, * but we use it to pass back an internal FETD * error code. XXX KDM Hopefully the FETD is only * using 16 bits for an error code, since that's * all the space we have in the sks field. */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ io->io_hdr.port_status); } if (ctl_debug & CTL_DEBUG_CDB_DATA) ctl_data_print(io); if (((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)) { /* * XXX KDM just assuming a single pointer here, and not a * S/G list. If we start using S/G lists for config data, * we'll need to know how to clean them up here as well. */ if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) free(io->scsiio.kern_data_ptr, M_CTL); ctl_done(io); retval = CTL_RETVAL_COMPLETE; } else { /* * XXX KDM now we need to continue data movement. Some * options: * - call ctl_scsiio() again? We don't do this for data * writes, because for those at least we know ahead of * time where the write will go and how long it is. For * config writes, though, that information is largely * contained within the write itself, thus we need to * parse out the data again. * * - Call some other function once the data is in? */ /* * XXX KDM call ctl_scsiio() again for now, and check flag * bits to see whether we're allocated or not. */ retval = ctl_scsiio(&io->scsiio); } return (retval); } /* * This gets called by a backend driver when it is done with a * data_submit method. */ void ctl_data_submit_done(union ctl_io *io) { /* * If the IO_CONT flag is set, we need to call the supplied * function to continue processing the I/O, instead of completing * the I/O just yet. * * If there is an error, though, we don't want to keep processing. * Instead, just send status back to the initiator. */ if ((io->io_hdr.flags & CTL_FLAG_IO_CONT) && (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { io->scsiio.io_cont(io); return; } ctl_done(io); } /* * This gets called by a backend driver when it is done with a * configuration write. */ void ctl_config_write_done(union ctl_io *io) { uint8_t *buf; /* * If the IO_CONT flag is set, we need to call the supplied * function to continue processing the I/O, instead of completing * the I/O just yet. * * If there is an error, though, we don't want to keep processing. * Instead, just send status back to the initiator. */ if ((io->io_hdr.flags & CTL_FLAG_IO_CONT) && (io->io_hdr.flags & CTL_FLAG_ABORT) == 0 && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { io->scsiio.io_cont(io); return; } /* * Since a configuration write can be done for commands that actually * have data allocated, like write buffer, and commands that have * no data, like start/stop unit, we need to check here. */ if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) buf = io->scsiio.kern_data_ptr; else buf = NULL; ctl_done(io); if (buf) free(buf, M_CTL); } void ctl_config_read_done(union ctl_io *io) { uint8_t *buf; /* * If there is some error -- we are done, skip data transfer. */ if ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { if (io->io_hdr.flags & CTL_FLAG_ALLOCATED) buf = io->scsiio.kern_data_ptr; else buf = NULL; ctl_done(io); if (buf) free(buf, M_CTL); return; } /* * If the IO_CONT flag is set, we need to call the supplied * function to continue processing the I/O, instead of completing * the I/O just yet. */ if (io->io_hdr.flags & CTL_FLAG_IO_CONT) { io->scsiio.io_cont(io); return; } ctl_datamove(io); } /* * SCSI release command. */ int ctl_scsi_release(struct ctl_scsiio *ctsio) { int length, longid, thirdparty_id, resv_id; struct ctl_lun *lun; uint32_t residx; length = 0; resv_id = 0; CTL_DEBUG_PRINT(("ctl_scsi_release\n")); residx = ctl_get_initindex(&ctsio->io_hdr.nexus); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; switch (ctsio->cdb[0]) { case RELEASE_10: { struct scsi_release_10 *cdb; cdb = (struct scsi_release_10 *)ctsio->cdb; if (cdb->byte2 & SR10_LONGID) longid = 1; else thirdparty_id = cdb->thirdparty_id; resv_id = cdb->resv_id; length = scsi_2btoul(cdb->length); break; } } /* * XXX KDM right now, we only support LUN reservation. We don't * support 3rd party reservations, or extent reservations, which * might actually need the parameter list. If we've gotten this * far, we've got a LUN reservation. Anything else got kicked out * above. So, according to SPC, ignore the length. */ length = 0; if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) && (length > 0)) { ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK); ctsio->kern_data_len = length; ctsio->kern_total_len = length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (length > 0) thirdparty_id = scsi_8btou64(ctsio->kern_data_ptr); mtx_lock(&lun->lun_lock); /* * According to SPC, it is not an error for an intiator to attempt * to release a reservation on a LUN that isn't reserved, or that * is reserved by another initiator. The reservation can only be * released, though, by the initiator who made it or by one of * several reset type events. */ if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == residx)) lun->flags &= ~CTL_LUN_RESERVED; mtx_unlock(&lun->lun_lock); if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_scsi_reserve(struct ctl_scsiio *ctsio) { int extent, thirdparty, longid; int resv_id, length; uint64_t thirdparty_id; struct ctl_lun *lun; uint32_t residx; extent = 0; thirdparty = 0; longid = 0; resv_id = 0; length = 0; thirdparty_id = 0; CTL_DEBUG_PRINT(("ctl_reserve\n")); residx = ctl_get_initindex(&ctsio->io_hdr.nexus); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; switch (ctsio->cdb[0]) { case RESERVE_10: { struct scsi_reserve_10 *cdb; cdb = (struct scsi_reserve_10 *)ctsio->cdb; if (cdb->byte2 & SR10_LONGID) longid = 1; else thirdparty_id = cdb->thirdparty_id; resv_id = cdb->resv_id; length = scsi_2btoul(cdb->length); break; } } /* * XXX KDM right now, we only support LUN reservation. We don't * support 3rd party reservations, or extent reservations, which * might actually need the parameter list. If we've gotten this * far, we've got a LUN reservation. Anything else got kicked out * above. So, according to SPC, ignore the length. */ length = 0; if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) && (length > 0)) { ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK); ctsio->kern_data_len = length; ctsio->kern_total_len = length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (length > 0) thirdparty_id = scsi_8btou64(ctsio->kern_data_ptr); mtx_lock(&lun->lun_lock); if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx != residx)) { ctl_set_reservation_conflict(ctsio); goto bailout; } lun->flags |= CTL_LUN_RESERVED; lun->res_idx = residx; ctl_set_success(ctsio); bailout: mtx_unlock(&lun->lun_lock); if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_start_stop(struct ctl_scsiio *ctsio) { struct scsi_start_stop_unit *cdb; struct ctl_lun *lun; int retval; CTL_DEBUG_PRINT(("ctl_start_stop\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; retval = 0; cdb = (struct scsi_start_stop_unit *)ctsio->cdb; /* * XXX KDM * We don't support the immediate bit on a stop unit. In order to * do that, we would need to code up a way to know that a stop is * pending, and hold off any new commands until it completes, one * way or another. Then we could accept or reject those commands * depending on its status. We would almost need to do the reverse * of what we do below for an immediate start -- return the copy of * the ctl_io to the FETD with status to send to the host (and to * free the copy!) and then free the original I/O once the stop * actually completes. That way, the OOA queue mechanism can work * to block commands that shouldn't proceed. Another alternative * would be to put the copy in the queue in place of the original, * and return the original back to the caller. That could be * slightly safer.. */ if ((cdb->byte2 & SSS_IMMED) && ((cdb->how & SSS_START) == 0)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if ((lun->flags & CTL_LUN_PR_RESERVED) && ((cdb->how & SSS_START)==0)) { uint32_t residx; residx = ctl_get_initindex(&ctsio->io_hdr.nexus); if (ctl_get_prkey(lun, residx) == 0 || (lun->pr_res_idx!=residx && lun->res_type < 4)) { ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } } /* * If there is no backend on this device, we can't start or stop * it. In theory we shouldn't get any start/stop commands in the * first place at this level if the LUN doesn't have a backend. * That should get stopped by the command decode code. */ if (lun->backend == NULL) { ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * XXX KDM Copan-specific offline behavior. * Figure out a reasonable way to port this? */ #ifdef NEEDTOPORT mtx_lock(&lun->lun_lock); if (((cdb->byte2 & SSS_ONOFFLINE) == 0) && (lun->flags & CTL_LUN_OFFLINE)) { /* * If the LUN is offline, and the on/offline bit isn't set, * reject the start or stop. Otherwise, let it through. */ mtx_unlock(&lun->lun_lock); ctl_set_lun_not_ready(ctsio); ctl_done((union ctl_io *)ctsio); } else { mtx_unlock(&lun->lun_lock); #endif /* NEEDTOPORT */ /* * This could be a start or a stop when we're online, * or a stop/offline or start/online. A start or stop when * we're offline is covered in the case above. */ /* * In the non-immediate case, we send the request to * the backend and return status to the user when * it is done. * * In the immediate case, we allocate a new ctl_io * to hold a copy of the request, and send that to * the backend. We then set good status on the * user's request and return it immediately. */ if (cdb->byte2 & SSS_IMMED) { union ctl_io *new_io; new_io = ctl_alloc_io(ctsio->io_hdr.pool); ctl_copy_io((union ctl_io *)ctsio, new_io); retval = lun->backend->config_write(new_io); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); } else { retval = lun->backend->config_write( (union ctl_io *)ctsio); } #ifdef NEEDTOPORT } #endif return (retval); } /* * We support the SYNCHRONIZE CACHE command (10 and 16 byte versions), but * we don't really do anything with the LBA and length fields if the user * passes them in. Instead we'll just flush out the cache for the entire * LUN. */ int ctl_sync_cache(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_softc *softc; struct ctl_lba_len_flags *lbalen; uint64_t starting_lba; uint32_t block_count; int retval; uint8_t byte2; CTL_DEBUG_PRINT(("ctl_sync_cache\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; retval = 0; switch (ctsio->cdb[0]) { case SYNCHRONIZE_CACHE: { struct scsi_sync_cache *cdb; cdb = (struct scsi_sync_cache *)ctsio->cdb; starting_lba = scsi_4btoul(cdb->begin_lba); block_count = scsi_2btoul(cdb->lb_count); byte2 = cdb->byte2; break; } case SYNCHRONIZE_CACHE_16: { struct scsi_sync_cache_16 *cdb; cdb = (struct scsi_sync_cache_16 *)ctsio->cdb; starting_lba = scsi_8btou64(cdb->begin_lba); block_count = scsi_4btoul(cdb->lb_count); byte2 = cdb->byte2; break; } default: ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); goto bailout; break; /* NOTREACHED */ } /* * We check the LBA and length, but don't do anything with them. * A SYNCHRONIZE CACHE will cause the entire cache for this lun to * get flushed. This check will just help satisfy anyone who wants * to see an error for an out of range LBA. */ if ((starting_lba + block_count) > (lun->be_lun->maxlba + 1)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); goto bailout; } /* * If this LUN has no backend, we can't flush the cache anyway. */ if (lun->backend == NULL) { ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); goto bailout; } lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = starting_lba; lbalen->len = block_count; lbalen->flags = byte2; /* * Check to see whether we're configured to send the SYNCHRONIZE * CACHE command directly to the back end. */ mtx_lock(&lun->lun_lock); if ((softc->flags & CTL_FLAG_REAL_SYNC) && (++(lun->sync_count) >= lun->sync_interval)) { lun->sync_count = 0; mtx_unlock(&lun->lun_lock); retval = lun->backend->config_write((union ctl_io *)ctsio); } else { mtx_unlock(&lun->lun_lock); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); } bailout: return (retval); } int ctl_format(struct ctl_scsiio *ctsio) { struct scsi_format *cdb; struct ctl_lun *lun; int length, defect_list_len; CTL_DEBUG_PRINT(("ctl_format\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_format *)ctsio->cdb; length = 0; if (cdb->byte2 & SF_FMTDATA) { if (cdb->byte2 & SF_LONGLIST) length = sizeof(struct scsi_format_header_long); else length = sizeof(struct scsi_format_header_short); } if (((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) && (length > 0)) { ctsio->kern_data_ptr = malloc(length, M_CTL, M_WAITOK); ctsio->kern_data_len = length; ctsio->kern_total_len = length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } defect_list_len = 0; if (cdb->byte2 & SF_FMTDATA) { if (cdb->byte2 & SF_LONGLIST) { struct scsi_format_header_long *header; header = (struct scsi_format_header_long *) ctsio->kern_data_ptr; defect_list_len = scsi_4btoul(header->defect_list_len); if (defect_list_len != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); goto bailout; } } else { struct scsi_format_header_short *header; header = (struct scsi_format_header_short *) ctsio->kern_data_ptr; defect_list_len = scsi_2btoul(header->defect_list_len); if (defect_list_len != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); goto bailout; } } } /* * The format command will clear out the "Medium format corrupted" * status if set by the configuration code. That status is really * just a way to notify the host that we have lost the media, and * get them to issue a command that will basically make them think * they're blowing away the media. */ mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_INOPERABLE; mtx_unlock(&lun->lun_lock); ctl_set_success(ctsio); bailout: if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_buffer(struct ctl_scsiio *ctsio) { struct scsi_read_buffer *cdb; struct ctl_lun *lun; int buffer_offset, len; static uint8_t descr[4]; static uint8_t echo_descr[4] = { 0 }; CTL_DEBUG_PRINT(("ctl_read_buffer\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_read_buffer *)ctsio->cdb; if ((cdb->byte2 & RWB_MODE) != RWB_MODE_DATA && (cdb->byte2 & RWB_MODE) != RWB_MODE_ECHO_DESCR && (cdb->byte2 & RWB_MODE) != RWB_MODE_DESCR) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } len = scsi_3btoul(cdb->length); buffer_offset = scsi_3btoul(cdb->offset); if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 6, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if ((cdb->byte2 & RWB_MODE) == RWB_MODE_DESCR) { descr[0] = 0; scsi_ulto3b(CTL_WRITE_BUFFER_SIZE, &descr[1]); ctsio->kern_data_ptr = descr; len = min(len, sizeof(descr)); } else if ((cdb->byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) { ctsio->kern_data_ptr = echo_descr; len = min(len, sizeof(echo_descr)); } else { if (lun->write_buffer == NULL) { lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, M_CTL, M_WAITOK); } ctsio->kern_data_ptr = lun->write_buffer + buffer_offset; } ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctl_set_success(ctsio); ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_write_buffer(struct ctl_scsiio *ctsio) { struct scsi_write_buffer *cdb; struct ctl_lun *lun; int buffer_offset, len; CTL_DEBUG_PRINT(("ctl_write_buffer\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_write_buffer *)ctsio->cdb; if ((cdb->byte2 & RWB_MODE) != RWB_MODE_DATA) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } len = scsi_3btoul(cdb->length); buffer_offset = scsi_3btoul(cdb->offset); if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 6, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * If we've got a kernel request that hasn't been malloced yet, * malloc it and tell the caller the data buffer is here. */ if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { if (lun->write_buffer == NULL) { lun->write_buffer = malloc(CTL_WRITE_BUFFER_SIZE, M_CTL, M_WAITOK); } ctsio->kern_data_ptr = lun->write_buffer + buffer_offset; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_write_same(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int len, retval; uint8_t byte2; retval = CTL_RETVAL_COMPLETE; CTL_DEBUG_PRINT(("ctl_write_same\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; switch (ctsio->cdb[0]) { case WRITE_SAME_10: { struct scsi_write_same_10 *cdb; cdb = (struct scsi_write_same_10 *)ctsio->cdb; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); byte2 = cdb->byte2; break; } case WRITE_SAME_16: { struct scsi_write_same_16 *cdb; cdb = (struct scsi_write_same_16 *)ctsio->cdb; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); byte2 = cdb->byte2; break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* NDOB and ANCHOR flags can be used only together with UNMAP */ if ((byte2 & SWS_UNMAP) == 0 && (byte2 & (SWS_NDOB | SWS_ANCHOR)) != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* Zero number of blocks means "to the last logical block" */ if (num_blocks == 0) { if ((lun->be_lun->maxlba + 1) - lba > UINT32_MAX) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 0, /*command*/ 1, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } num_blocks = (lun->be_lun->maxlba + 1) - lba; } len = lun->be_lun->blocksize; /* * If we've got a kernel request that hasn't been malloced yet, * malloc it and tell the caller the data buffer is here. */ if ((byte2 & SWS_NDOB) == 0 && (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; lbalen->flags = byte2; retval = lun->backend->config_write((union ctl_io *)ctsio); return (retval); } int ctl_unmap(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct scsi_unmap *cdb; struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_header *hdr; struct scsi_unmap_desc *buf, *end, *endnz, *range; uint64_t lba; uint32_t num_blocks; int len, retval; uint8_t byte2; retval = CTL_RETVAL_COMPLETE; CTL_DEBUG_PRINT(("ctl_unmap\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_unmap *)ctsio->cdb; len = scsi_2btoul(cdb->length); byte2 = cdb->byte2; /* * If we've got a kernel request that hasn't been malloced yet, * malloc it and tell the caller the data buffer is here. */ if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(len, M_CTL, M_WAITOK);; ctsio->kern_data_len = len; ctsio->kern_total_len = len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } len = ctsio->kern_total_len - ctsio->kern_data_resid; hdr = (struct scsi_unmap_header *)ctsio->kern_data_ptr; if (len < sizeof (*hdr) || len < (scsi_2btoul(hdr->length) + sizeof(hdr->length)) || len < (scsi_2btoul(hdr->desc_length) + sizeof (*hdr)) || scsi_2btoul(hdr->desc_length) % sizeof(*buf) != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 0, /*command*/ 0, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); goto done; } len = scsi_2btoul(hdr->desc_length); buf = (struct scsi_unmap_desc *)(hdr + 1); end = buf + len / sizeof(*buf); endnz = buf; for (range = buf; range < end; range++) { lba = scsi_8btou64(range->lba); num_blocks = scsi_4btoul(range->length); if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (num_blocks != 0) endnz = range + 1; } /* * Block backend can not handle zero last range. * Filter it out and return if there is nothing left. */ len = (uint8_t *)endnz - (uint8_t *)buf; if (len == 0) { ctl_set_success(ctsio); goto done; } mtx_lock(&lun->lun_lock); ptrlen = (struct ctl_ptr_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; ptrlen->ptr = (void *)buf; ptrlen->len = len; ptrlen->flags = byte2; ctl_check_blocked(lun); mtx_unlock(&lun->lun_lock); retval = lun->backend->config_write((union ctl_io *)ctsio); return (retval); done: if (ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) { free(ctsio->kern_data_ptr, M_CTL); ctsio->io_hdr.flags &= ~CTL_FLAG_ALLOCATED; } ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Note that this function currently doesn't actually do anything inside * CTL to enforce things if the DQue bit is turned on. * * Also note that this function can't be used in the default case, because * the DQue bit isn't set in the changeable mask for the control mode page * anyway. This is just here as an example for how to implement a page * handler, and a placeholder in case we want to allow the user to turn * tagged queueing on and off. * * The D_SENSE bit handling is functional, however, and will turn * descriptor sense on and off for a given LUN. */ int ctl_control_page_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr) { struct scsi_control_page *current_cp, *saved_cp, *user_cp; struct ctl_lun *lun; int set_ua; uint32_t initidx; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); set_ua = 0; user_cp = (struct scsi_control_page *)page_ptr; current_cp = (struct scsi_control_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT)); saved_cp = (struct scsi_control_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_SAVED)); mtx_lock(&lun->lun_lock); if (((current_cp->rlec & SCP_DSENSE) == 0) && ((user_cp->rlec & SCP_DSENSE) != 0)) { /* * Descriptor sense is currently turned off and the user * wants to turn it on. */ current_cp->rlec |= SCP_DSENSE; saved_cp->rlec |= SCP_DSENSE; lun->flags |= CTL_LUN_SENSE_DESC; set_ua = 1; } else if (((current_cp->rlec & SCP_DSENSE) != 0) && ((user_cp->rlec & SCP_DSENSE) == 0)) { /* * Descriptor sense is currently turned on, and the user * wants to turn it off. */ current_cp->rlec &= ~SCP_DSENSE; saved_cp->rlec &= ~SCP_DSENSE; lun->flags &= ~CTL_LUN_SENSE_DESC; set_ua = 1; } if ((current_cp->queue_flags & SCP_QUEUE_ALG_MASK) != (user_cp->queue_flags & SCP_QUEUE_ALG_MASK)) { current_cp->queue_flags &= ~SCP_QUEUE_ALG_MASK; current_cp->queue_flags |= user_cp->queue_flags & SCP_QUEUE_ALG_MASK; saved_cp->queue_flags &= ~SCP_QUEUE_ALG_MASK; saved_cp->queue_flags |= user_cp->queue_flags & SCP_QUEUE_ALG_MASK; set_ua = 1; } if ((current_cp->eca_and_aen & SCP_SWP) != (user_cp->eca_and_aen & SCP_SWP)) { current_cp->eca_and_aen &= ~SCP_SWP; current_cp->eca_and_aen |= user_cp->eca_and_aen & SCP_SWP; saved_cp->eca_and_aen &= ~SCP_SWP; saved_cp->eca_and_aen |= user_cp->eca_and_aen & SCP_SWP; set_ua = 1; } if (set_ua != 0) ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); mtx_unlock(&lun->lun_lock); return (0); } int ctl_caching_sp_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr) { struct scsi_caching_page *current_cp, *saved_cp, *user_cp; struct ctl_lun *lun; int set_ua; uint32_t initidx; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); set_ua = 0; user_cp = (struct scsi_caching_page *)page_ptr; current_cp = (struct scsi_caching_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT)); saved_cp = (struct scsi_caching_page *) (page_index->page_data + (page_index->page_len * CTL_PAGE_SAVED)); mtx_lock(&lun->lun_lock); if ((current_cp->flags1 & (SCP_WCE | SCP_RCD)) != (user_cp->flags1 & (SCP_WCE | SCP_RCD))) { current_cp->flags1 &= ~(SCP_WCE | SCP_RCD); current_cp->flags1 |= user_cp->flags1 & (SCP_WCE | SCP_RCD); saved_cp->flags1 &= ~(SCP_WCE | SCP_RCD); saved_cp->flags1 |= user_cp->flags1 & (SCP_WCE | SCP_RCD); set_ua = 1; } if (set_ua != 0) ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); mtx_unlock(&lun->lun_lock); return (0); } int ctl_debugconf_sp_select_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr) { uint8_t *c; int i; c = ((struct copan_debugconf_subpage *)page_ptr)->ctl_time_io_secs; ctl_time_io_secs = (c[0] << 8) | (c[1] << 0) | 0; CTL_DEBUG_PRINT(("set ctl_time_io_secs to %d\n", ctl_time_io_secs)); printf("set ctl_time_io_secs to %d\n", ctl_time_io_secs); printf("page data:"); for (i=0; i<8; i++) printf(" %.2x",page_ptr[i]); printf("\n"); return (0); } int ctl_debugconf_sp_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc) { struct copan_debugconf_subpage *page; page = (struct copan_debugconf_subpage *)page_index->page_data + (page_index->page_len * pc); switch (pc) { case SMS_PAGE_CTRL_CHANGEABLE >> 6: case SMS_PAGE_CTRL_DEFAULT >> 6: case SMS_PAGE_CTRL_SAVED >> 6: /* * We don't update the changable or default bits for this page. */ break; case SMS_PAGE_CTRL_CURRENT >> 6: page->ctl_time_io_secs[0] = ctl_time_io_secs >> 8; page->ctl_time_io_secs[1] = ctl_time_io_secs >> 0; break; default: #ifdef NEEDTOPORT EPRINT(0, "Invalid PC %d!!", pc); #endif /* NEEDTOPORT */ break; } return (0); } static int ctl_do_mode_select(union ctl_io *io) { struct scsi_mode_page_header *page_header; struct ctl_page_index *page_index; struct ctl_scsiio *ctsio; int control_dev, page_len; int page_len_offset, page_len_size; union ctl_modepage_info *modepage_info; struct ctl_lun *lun; int *len_left, *len_used; int retval, i; ctsio = &io->scsiio; page_index = NULL; page_len = 0; retval = CTL_RETVAL_COMPLETE; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun->lun_type != T_DIRECT) control_dev = 1; else control_dev = 0; modepage_info = (union ctl_modepage_info *) ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes; len_left = &modepage_info->header.len_left; len_used = &modepage_info->header.len_used; do_next_page: page_header = (struct scsi_mode_page_header *) (ctsio->kern_data_ptr + *len_used); if (*len_left == 0) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } else if (*len_left < sizeof(struct scsi_mode_page_header)) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } else if ((page_header->page_code & SMPH_SPF) && (*len_left < sizeof(struct scsi_mode_page_header_sp))) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * XXX KDM should we do something with the block descriptor? */ for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { if ((control_dev != 0) && (lun->mode_pages.index[i].page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) != (page_header->page_code & SMPH_PC_MASK)) continue; /* * If neither page has a subpage code, then we've got a * match. */ if (((lun->mode_pages.index[i].page_code & SMPH_SPF) == 0) && ((page_header->page_code & SMPH_SPF) == 0)) { page_index = &lun->mode_pages.index[i]; page_len = page_header->page_length; break; } /* * If both pages have subpages, then the subpage numbers * have to match. */ if ((lun->mode_pages.index[i].page_code & SMPH_SPF) && (page_header->page_code & SMPH_SPF)) { struct scsi_mode_page_header_sp *sph; sph = (struct scsi_mode_page_header_sp *)page_header; if (lun->mode_pages.index[i].subpage == sph->subpage) { page_index = &lun->mode_pages.index[i]; page_len = scsi_2btoul(sph->page_length); break; } } } /* * If we couldn't find the page, or if we don't have a mode select * handler for it, send back an error to the user. */ if ((page_index == NULL) || (page_index->select_handler == NULL)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ *len_used, /*bit_valid*/ 0, /*bit*/ 0); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (page_index->page_code & SMPH_SPF) { page_len_offset = 2; page_len_size = 2; } else { page_len_size = 1; page_len_offset = 1; } /* * If the length the initiator gives us isn't the one we specify in * the mode page header, or if they didn't specify enough data in * the CDB to avoid truncating this page, kick out the request. */ if ((page_len != (page_index->page_len - page_len_offset - page_len_size)) || (*len_left < page_index->page_len)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ *len_used + page_len_offset, /*bit_valid*/ 0, /*bit*/ 0); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Run through the mode page, checking to make sure that the bits * the user changed are actually legal for him to change. */ for (i = 0; i < page_index->page_len; i++) { uint8_t *user_byte, *change_mask, *current_byte; int bad_bit; int j; user_byte = (uint8_t *)page_header + i; change_mask = page_index->page_data + (page_index->page_len * CTL_PAGE_CHANGEABLE) + i; current_byte = page_index->page_data + (page_index->page_len * CTL_PAGE_CURRENT) + i; /* * Check to see whether the user set any bits in this byte * that he is not allowed to set. */ if ((*user_byte & ~(*change_mask)) == (*current_byte & ~(*change_mask))) continue; /* * Go through bit by bit to determine which one is illegal. */ bad_bit = 0; for (j = 7; j >= 0; j--) { if ((((1 << i) & ~(*change_mask)) & *user_byte) != (((1 << i) & ~(*change_mask)) & *current_byte)) { bad_bit = i; break; } } ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ *len_used + i, /*bit_valid*/ 1, /*bit*/ bad_bit); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Decrement these before we call the page handler, since we may * end up getting called back one way or another before the handler * returns to this context. */ *len_left -= page_index->page_len; *len_used += page_index->page_len; retval = page_index->select_handler(ctsio, page_index, (uint8_t *)page_header); /* * If the page handler returns CTL_RETVAL_QUEUED, then we need to * wait until this queued command completes to finish processing * the mode page. If it returns anything other than * CTL_RETVAL_COMPLETE (e.g. CTL_RETVAL_ERROR), then it should have * already set the sense information, freed the data pointer, and * completed the io for us. */ if (retval != CTL_RETVAL_COMPLETE) goto bailout_no_done; /* * If the initiator sent us more than one page, parse the next one. */ if (*len_left > 0) goto do_next_page; ctl_set_success(ctsio); free(ctsio->kern_data_ptr, M_CTL); ctl_done((union ctl_io *)ctsio); bailout_no_done: return (CTL_RETVAL_COMPLETE); } int ctl_mode_select(struct ctl_scsiio *ctsio) { int param_len, pf, sp; int header_size, bd_len; int len_left, len_used; struct ctl_page_index *page_index; struct ctl_lun *lun; int control_dev, page_len; union ctl_modepage_info *modepage_info; int retval; pf = 0; sp = 0; page_len = 0; len_used = 0; len_left = 0; retval = 0; bd_len = 0; page_index = NULL; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun->lun_type != T_DIRECT) control_dev = 1; else control_dev = 0; switch (ctsio->cdb[0]) { case MODE_SELECT_6: { struct scsi_mode_select_6 *cdb; cdb = (struct scsi_mode_select_6 *)ctsio->cdb; pf = (cdb->byte2 & SMS_PF) ? 1 : 0; sp = (cdb->byte2 & SMS_SP) ? 1 : 0; param_len = cdb->length; header_size = sizeof(struct scsi_mode_header_6); break; } case MODE_SELECT_10: { struct scsi_mode_select_10 *cdb; cdb = (struct scsi_mode_select_10 *)ctsio->cdb; pf = (cdb->byte2 & SMS_PF) ? 1 : 0; sp = (cdb->byte2 & SMS_SP) ? 1 : 0; param_len = scsi_2btoul(cdb->length); header_size = sizeof(struct scsi_mode_header_10); break; } default: ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * From SPC-3: * "A parameter list length of zero indicates that the Data-Out Buffer * shall be empty. This condition shall not be considered as an error." */ if (param_len == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Since we'll hit this the first time through, prior to * allocation, we don't need to free a data buffer here. */ if (param_len < header_size) { ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Allocate the data buffer and grab the user's data. In theory, * we shouldn't have to sanity check the parameter list length here * because the maximum size is 64K. We should be able to malloc * that much without too many problems. */ if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(param_len, M_CTL, M_WAITOK); ctsio->kern_data_len = param_len; ctsio->kern_total_len = param_len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } switch (ctsio->cdb[0]) { case MODE_SELECT_6: { struct scsi_mode_header_6 *mh6; mh6 = (struct scsi_mode_header_6 *)ctsio->kern_data_ptr; bd_len = mh6->blk_desc_len; break; } case MODE_SELECT_10: { struct scsi_mode_header_10 *mh10; mh10 = (struct scsi_mode_header_10 *)ctsio->kern_data_ptr; bd_len = scsi_2btoul(mh10->blk_desc_len); break; } default: panic("Invalid CDB type %#x", ctsio->cdb[0]); break; } if (param_len < (header_size + bd_len)) { free(ctsio->kern_data_ptr, M_CTL); ctl_set_param_len_error(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Set the IO_CONT flag, so that if this I/O gets passed to * ctl_config_write_done(), it'll get passed back to * ctl_do_mode_select() for further processing, or completion if * we're all done. */ ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT; ctsio->io_cont = ctl_do_mode_select; modepage_info = (union ctl_modepage_info *) ctsio->io_hdr.ctl_private[CTL_PRIV_MODEPAGE].bytes; memset(modepage_info, 0, sizeof(*modepage_info)); len_left = param_len - header_size - bd_len; len_used = header_size + bd_len; modepage_info->header.len_left = len_left; modepage_info->header.len_used = len_used; return (ctl_do_mode_select((union ctl_io *)ctsio)); } int ctl_mode_sense(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; int pc, page_code, dbd, llba, subpage; int alloc_len, page_len, header_len, total_len; struct scsi_mode_block_descr *block_desc; struct ctl_page_index *page_index; int control_dev; dbd = 0; llba = 0; block_desc = NULL; page_index = NULL; CTL_DEBUG_PRINT(("ctl_mode_sense\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun->lun_type != T_DIRECT) control_dev = 1; else control_dev = 0; switch (ctsio->cdb[0]) { case MODE_SENSE_6: { struct scsi_mode_sense_6 *cdb; cdb = (struct scsi_mode_sense_6 *)ctsio->cdb; header_len = sizeof(struct scsi_mode_hdr_6); if (cdb->byte2 & SMS_DBD) dbd = 1; else header_len += sizeof(struct scsi_mode_block_descr); pc = (cdb->page & SMS_PAGE_CTRL_MASK) >> 6; page_code = cdb->page & SMS_PAGE_CODE; subpage = cdb->subpage; alloc_len = cdb->length; break; } case MODE_SENSE_10: { struct scsi_mode_sense_10 *cdb; cdb = (struct scsi_mode_sense_10 *)ctsio->cdb; header_len = sizeof(struct scsi_mode_hdr_10); if (cdb->byte2 & SMS_DBD) dbd = 1; else header_len += sizeof(struct scsi_mode_block_descr); if (cdb->byte2 & SMS10_LLBAA) llba = 1; pc = (cdb->page & SMS_PAGE_CTRL_MASK) >> 6; page_code = cdb->page & SMS_PAGE_CODE; subpage = cdb->subpage; alloc_len = scsi_2btoul(cdb->length); break; } default: ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * We have to make a first pass through to calculate the size of * the pages that match the user's query. Then we allocate enough * memory to hold it, and actually copy the data into the buffer. */ switch (page_code) { case SMS_ALL_PAGES_PAGE: { int i; page_len = 0; /* * At the moment, values other than 0 and 0xff here are * reserved according to SPC-3. */ if ((subpage != SMS_SUBPAGE_PAGE_0) && (subpage != SMS_SUBPAGE_ALL)) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 3, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { if ((control_dev != 0) && (lun->mode_pages.index[i].page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; /* * We don't use this subpage if the user didn't * request all subpages. */ if ((lun->mode_pages.index[i].subpage != 0) && (subpage == SMS_SUBPAGE_PAGE_0)) continue; #if 0 printf("found page %#x len %d\n", lun->mode_pages.index[i].page_code & SMPH_PC_MASK, lun->mode_pages.index[i].page_len); #endif page_len += lun->mode_pages.index[i].page_len; } break; } default: { int i; page_len = 0; for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { /* Look for the right page code */ if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) != page_code) continue; /* Look for the right subpage or the subpage wildcard*/ if ((lun->mode_pages.index[i].subpage != subpage) && (subpage != SMS_SUBPAGE_ALL)) continue; /* Make sure the page is supported for this dev type */ if ((control_dev != 0) && (lun->mode_pages.index[i].page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; #if 0 printf("found page %#x len %d\n", lun->mode_pages.index[i].page_code & SMPH_PC_MASK, lun->mode_pages.index[i].page_len); #endif page_len += lun->mode_pages.index[i].page_len; } if (page_len == 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 5); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } break; } } total_len = header_len + page_len; #if 0 printf("header_len = %d, page_len = %d, total_len = %d\n", header_len, page_len, total_len); #endif ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } switch (ctsio->cdb[0]) { case MODE_SENSE_6: { struct scsi_mode_hdr_6 *header; header = (struct scsi_mode_hdr_6 *)ctsio->kern_data_ptr; header->datalen = MIN(total_len - 1, 254); if (control_dev == 0) { header->dev_specific = 0x10; /* DPOFUA */ if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) || (lun->mode_pages.control_page[CTL_PAGE_CURRENT] .eca_and_aen & SCP_SWP) != 0) header->dev_specific |= 0x80; /* WP */ } if (dbd) header->block_descr_len = 0; else header->block_descr_len = sizeof(struct scsi_mode_block_descr); block_desc = (struct scsi_mode_block_descr *)&header[1]; break; } case MODE_SENSE_10: { struct scsi_mode_hdr_10 *header; int datalen; header = (struct scsi_mode_hdr_10 *)ctsio->kern_data_ptr; datalen = MIN(total_len - 2, 65533); scsi_ulto2b(datalen, header->datalen); if (control_dev == 0) { header->dev_specific = 0x10; /* DPOFUA */ if ((lun->be_lun->flags & CTL_LUN_FLAG_READONLY) || (lun->mode_pages.control_page[CTL_PAGE_CURRENT] .eca_and_aen & SCP_SWP) != 0) header->dev_specific |= 0x80; /* WP */ } if (dbd) scsi_ulto2b(0, header->block_descr_len); else scsi_ulto2b(sizeof(struct scsi_mode_block_descr), header->block_descr_len); block_desc = (struct scsi_mode_block_descr *)&header[1]; break; } default: panic("invalid CDB type %#x", ctsio->cdb[0]); break; /* NOTREACHED */ } /* * If we've got a disk, use its blocksize in the block * descriptor. Otherwise, just set it to 0. */ if (dbd == 0) { if (control_dev == 0) scsi_ulto3b(lun->be_lun->blocksize, block_desc->block_len); else scsi_ulto3b(0, block_desc->block_len); } switch (page_code) { case SMS_ALL_PAGES_PAGE: { int i, data_used; data_used = header_len; for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { struct ctl_page_index *page_index; page_index = &lun->mode_pages.index[i]; if ((control_dev != 0) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; /* * We don't use this subpage if the user didn't * request all subpages. We already checked (above) * to make sure the user only specified a subpage * of 0 or 0xff in the SMS_ALL_PAGES_PAGE case. */ if ((page_index->subpage != 0) && (subpage == SMS_SUBPAGE_PAGE_0)) continue; /* * Call the handler, if it exists, to update the * page to the latest values. */ if (page_index->sense_handler != NULL) page_index->sense_handler(ctsio, page_index,pc); memcpy(ctsio->kern_data_ptr + data_used, page_index->page_data + (page_index->page_len * pc), page_index->page_len); data_used += page_index->page_len; } break; } default: { int i, data_used; data_used = header_len; for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { struct ctl_page_index *page_index; page_index = &lun->mode_pages.index[i]; /* Look for the right page code */ if ((page_index->page_code & SMPH_PC_MASK) != page_code) continue; /* Look for the right subpage or the subpage wildcard*/ if ((page_index->subpage != subpage) && (subpage != SMS_SUBPAGE_ALL)) continue; /* Make sure the page is supported for this dev type */ if ((control_dev != 0) && (page_index->page_flags & CTL_PAGE_FLAG_DISK_ONLY)) continue; /* * Call the handler, if it exists, to update the * page to the latest values. */ if (page_index->sense_handler != NULL) page_index->sense_handler(ctsio, page_index,pc); memcpy(ctsio->kern_data_ptr + data_used, page_index->page_data + (page_index->page_len * pc), page_index->page_len); data_used += page_index->page_len; } break; } } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_lbp_log_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc) { struct ctl_lun *lun; struct scsi_log_param_header *phdr; uint8_t *data; uint64_t val; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data = page_index->page_data; if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "blocksavail")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x0001, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x02; /* per-pool */ data += phdr->param_len; } if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "blocksused")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x0002, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x01; /* per-LUN */ data += phdr->param_len; } if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "poolblocksavail")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x00f1, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x02; /* per-pool */ data += phdr->param_len; } if (lun->backend->lun_attr != NULL && (val = lun->backend->lun_attr(lun->be_lun->be_lun, "poolblocksused")) != UINT64_MAX) { phdr = (struct scsi_log_param_header *)data; scsi_ulto2b(0x00f2, phdr->param_code); phdr->param_control = SLP_LBIN | SLP_LP; phdr->param_len = 8; data = (uint8_t *)(phdr + 1); scsi_ulto4b(val >> CTL_LBP_EXPONENT, data); data[4] = 0x02; /* per-pool */ data += phdr->param_len; } page_index->page_len = data - page_index->page_data; return (0); } int ctl_sap_log_sense_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc) { struct ctl_lun *lun; struct stat_page *data; uint64_t rn, wn, rb, wb; struct bintime rt, wt; int i; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data = (struct stat_page *)page_index->page_data; scsi_ulto2b(SLP_SAP, data->sap.hdr.param_code); data->sap.hdr.param_control = SLP_LBIN; data->sap.hdr.param_len = sizeof(struct scsi_log_stat_and_perf) - sizeof(struct scsi_log_param_header); rn = wn = rb = wb = 0; bintime_clear(&rt); bintime_clear(&wt); for (i = 0; i < CTL_MAX_PORTS; i++) { rn += lun->stats.ports[i].operations[CTL_STATS_READ]; wn += lun->stats.ports[i].operations[CTL_STATS_WRITE]; rb += lun->stats.ports[i].bytes[CTL_STATS_READ]; wb += lun->stats.ports[i].bytes[CTL_STATS_WRITE]; bintime_add(&rt, &lun->stats.ports[i].time[CTL_STATS_READ]); bintime_add(&wt, &lun->stats.ports[i].time[CTL_STATS_WRITE]); } scsi_u64to8b(rn, data->sap.read_num); scsi_u64to8b(wn, data->sap.write_num); if (lun->stats.blocksize > 0) { scsi_u64to8b(wb / lun->stats.blocksize, data->sap.recvieved_lba); scsi_u64to8b(rb / lun->stats.blocksize, data->sap.transmitted_lba); } scsi_u64to8b((uint64_t)rt.sec * 1000 + rt.frac / (UINT64_MAX / 1000), data->sap.read_int); scsi_u64to8b((uint64_t)wt.sec * 1000 + wt.frac / (UINT64_MAX / 1000), data->sap.write_int); scsi_u64to8b(0, data->sap.weighted_num); scsi_u64to8b(0, data->sap.weighted_int); scsi_ulto2b(SLP_IT, data->it.hdr.param_code); data->it.hdr.param_control = SLP_LBIN; data->it.hdr.param_len = sizeof(struct scsi_log_idle_time) - sizeof(struct scsi_log_param_header); #ifdef CTL_TIME_IO scsi_u64to8b(lun->idle_time / SBT_1MS, data->it.idle_int); #endif scsi_ulto2b(SLP_TI, data->ti.hdr.param_code); data->it.hdr.param_control = SLP_LBIN; data->ti.hdr.param_len = sizeof(struct scsi_log_time_interval) - sizeof(struct scsi_log_param_header); scsi_ulto4b(3, data->ti.exponent); scsi_ulto4b(1, data->ti.integer); page_index->page_len = sizeof(*data); return (0); } int ctl_log_sense(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; int i, pc, page_code, subpage; int alloc_len, total_len; struct ctl_page_index *page_index; struct scsi_log_sense *cdb; struct scsi_log_header *header; CTL_DEBUG_PRINT(("ctl_log_sense\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_log_sense *)ctsio->cdb; pc = (cdb->page & SLS_PAGE_CTRL_MASK) >> 6; page_code = cdb->page & SLS_PAGE_CODE; subpage = cdb->subpage; alloc_len = scsi_2btoul(cdb->length); page_index = NULL; for (i = 0; i < CTL_NUM_LOG_PAGES; i++) { page_index = &lun->log_pages.index[i]; /* Look for the right page code */ if ((page_index->page_code & SL_PAGE_CODE) != page_code) continue; /* Look for the right subpage or the subpage wildcard*/ if (page_index->subpage != subpage) continue; break; } if (i >= CTL_NUM_LOG_PAGES) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(struct scsi_log_header) + page_index->page_len; ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } header = (struct scsi_log_header *)ctsio->kern_data_ptr; header->page = page_index->page_code; if (page_index->subpage) { header->page |= SL_SPF; header->subpage = page_index->subpage; } scsi_ulto2b(page_index->page_len, header->datalen); /* * Call the handler, if it exists, to update the * page to the latest values. */ if (page_index->sense_handler != NULL) page_index->sense_handler(ctsio, page_index, pc); memcpy(header + 1, page_index->page_data, page_index->page_len); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_capacity(struct ctl_scsiio *ctsio) { struct scsi_read_capacity *cdb; struct scsi_read_capacity_data *data; struct ctl_lun *lun; uint32_t lba; CTL_DEBUG_PRINT(("ctl_read_capacity\n")); cdb = (struct scsi_read_capacity *)ctsio->cdb; lba = scsi_4btoul(cdb->addr); if (((cdb->pmi & SRC_PMI) == 0) && (lba != 0)) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*data), M_CTL, M_WAITOK | M_ZERO); data = (struct scsi_read_capacity_data *)ctsio->kern_data_ptr; ctsio->residual = 0; ctsio->kern_data_len = sizeof(*data); ctsio->kern_total_len = sizeof(*data); ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * If the maximum LBA is greater than 0xfffffffe, the user must * issue a SERVICE ACTION IN (16) command, with the read capacity * serivce action set. */ if (lun->be_lun->maxlba > 0xfffffffe) scsi_ulto4b(0xffffffff, data->addr); else scsi_ulto4b(lun->be_lun->maxlba, data->addr); /* * XXX KDM this may not be 512 bytes... */ scsi_ulto4b(lun->be_lun->blocksize, data->length); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_capacity_16(struct ctl_scsiio *ctsio) { struct scsi_read_capacity_16 *cdb; struct scsi_read_capacity_data_long *data; struct ctl_lun *lun; uint64_t lba; uint32_t alloc_len; CTL_DEBUG_PRINT(("ctl_read_capacity_16\n")); cdb = (struct scsi_read_capacity_16 *)ctsio->cdb; alloc_len = scsi_4btoul(cdb->alloc_len); lba = scsi_8btou64(cdb->addr); if ((cdb->reladr & SRC16_PMI) && (lba != 0)) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*data), M_CTL, M_WAITOK | M_ZERO); data = (struct scsi_read_capacity_data_long *)ctsio->kern_data_ptr; if (sizeof(*data) < alloc_len) { ctsio->residual = alloc_len - sizeof(*data); ctsio->kern_data_len = sizeof(*data); ctsio->kern_total_len = sizeof(*data); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; scsi_u64to8b(lun->be_lun->maxlba, data->addr); /* XXX KDM this may not be 512 bytes... */ scsi_ulto4b(lun->be_lun->blocksize, data->length); data->prot_lbppbe = lun->be_lun->pblockexp & SRC16_LBPPBE; scsi_ulto2b(lun->be_lun->pblockoff & SRC16_LALBA_A, data->lalba_lbp); if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) data->lalba_lbp[0] |= SRC16_LBPME | SRC16_LBPRZ; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_get_lba_status(struct ctl_scsiio *ctsio) { struct scsi_get_lba_status *cdb; struct scsi_get_lba_status_data *data; struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t alloc_len, total_len; int retval; CTL_DEBUG_PRINT(("ctl_get_lba_status\n")); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_get_lba_status *)ctsio->cdb; lba = scsi_8btou64(cdb->addr); alloc_len = scsi_4btoul(cdb->alloc_len); if (lba > lun->be_lun->maxlba) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(*data) + sizeof(data->descr[0]); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); data = (struct scsi_get_lba_status_data *)ctsio->kern_data_ptr; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* Fill dummy data in case backend can't tell anything. */ scsi_ulto4b(4 + sizeof(data->descr[0]), data->length); scsi_u64to8b(lba, data->descr[0].addr); scsi_ulto4b(MIN(UINT32_MAX, lun->be_lun->maxlba + 1 - lba), data->descr[0].length); data->descr[0].status = 0; /* Mapped or unknown. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; lbalen = (struct ctl_lba_len_flags *)&ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = total_len; lbalen->flags = 0; retval = lun->backend->config_read((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_read_defect(struct ctl_scsiio *ctsio) { struct scsi_read_defect_data_10 *ccb10; struct scsi_read_defect_data_12 *ccb12; struct scsi_read_defect_data_hdr_10 *data10; struct scsi_read_defect_data_hdr_12 *data12; uint32_t alloc_len, data_len; uint8_t format; CTL_DEBUG_PRINT(("ctl_read_defect\n")); if (ctsio->cdb[0] == READ_DEFECT_DATA_10) { ccb10 = (struct scsi_read_defect_data_10 *)&ctsio->cdb; format = ccb10->format; alloc_len = scsi_2btoul(ccb10->alloc_length); data_len = sizeof(*data10); } else { ccb12 = (struct scsi_read_defect_data_12 *)&ctsio->cdb; format = ccb12->format; alloc_len = scsi_4btoul(ccb12->alloc_length); data_len = sizeof(*data12); } if (alloc_len == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; if (ctsio->cdb[0] == READ_DEFECT_DATA_10) { data10 = (struct scsi_read_defect_data_hdr_10 *) ctsio->kern_data_ptr; data10->format = format; scsi_ulto2b(0, data10->length); } else { data12 = (struct scsi_read_defect_data_hdr_12 *) ctsio->kern_data_ptr; data12->format = format; scsi_ulto2b(0, data12->generation); scsi_ulto4b(0, data12->length); } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio) { struct scsi_maintenance_in *cdb; int retval; int alloc_len, ext, total_len = 0, g, pc, pg, gs, os; int num_target_port_groups, num_target_ports; struct ctl_lun *lun; struct ctl_softc *softc; struct ctl_port *port; struct scsi_target_group_data *rtg_ptr; struct scsi_target_group_data_extended *rtg_ext_ptr; struct scsi_target_port_group_descriptor *tpg_desc; CTL_DEBUG_PRINT(("ctl_report_tagret_port_groups\n")); cdb = (struct scsi_maintenance_in *)ctsio->cdb; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; retval = CTL_RETVAL_COMPLETE; switch (cdb->byte2 & STG_PDF_MASK) { case STG_PDF_LENGTH: ext = 0; break; case STG_PDF_EXTENDED: ext = 1; break; default: ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 5); ctl_done((union ctl_io *)ctsio); return(retval); } if (softc->is_single) num_target_port_groups = 1; else num_target_port_groups = NUM_TARGET_PORT_GROUPS; num_target_ports = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; num_target_ports++; } mtx_unlock(&softc->ctl_lock); if (ext) total_len = sizeof(struct scsi_target_group_data_extended); else total_len = sizeof(struct scsi_target_group_data); total_len += sizeof(struct scsi_target_port_group_descriptor) * num_target_port_groups + sizeof(struct scsi_target_port_descriptor) * num_target_ports; alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (ext) { rtg_ext_ptr = (struct scsi_target_group_data_extended *) ctsio->kern_data_ptr; scsi_ulto4b(total_len - 4, rtg_ext_ptr->length); rtg_ext_ptr->format_type = 0x10; rtg_ext_ptr->implicit_transition_time = 0; tpg_desc = &rtg_ext_ptr->groups[0]; } else { rtg_ptr = (struct scsi_target_group_data *) ctsio->kern_data_ptr; scsi_ulto4b(total_len - 4, rtg_ptr->length); tpg_desc = &rtg_ptr->groups[0]; } mtx_lock(&softc->ctl_lock); pg = softc->port_min / softc->port_cnt; if (softc->ha_link == CTL_HA_LINK_OFFLINE) gs = TPG_ASYMMETRIC_ACCESS_UNAVAILABLE; else if (softc->ha_link == CTL_HA_LINK_UNKNOWN) gs = TPG_ASYMMETRIC_ACCESS_TRANSITIONING; else if (softc->ha_mode == CTL_HA_MODE_ACT_STBY) gs = TPG_ASYMMETRIC_ACCESS_STANDBY; else gs = TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED; if (lun->flags & CTL_LUN_PRIMARY_SC) { os = gs; gs = TPG_ASYMMETRIC_ACCESS_OPTIMIZED; } else os = TPG_ASYMMETRIC_ACCESS_OPTIMIZED; for (g = 0; g < num_target_port_groups; g++) { tpg_desc->pref_state = (g == pg) ? gs : os; tpg_desc->support = TPG_AO_SUP | TPG_AN_SUP | TPG_S_SUP | TPG_U_SUP | TPG_T_SUP; scsi_ulto2b(g + 1, tpg_desc->target_port_group); tpg_desc->status = TPG_IMPLICIT; pc = 0; STAILQ_FOREACH(port, &softc->port_list, links) { if (port->targ_port < g * softc->port_cnt || port->targ_port >= (g + 1) * softc->port_cnt) continue; if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; scsi_ulto2b(port->targ_port, tpg_desc->descriptors[pc]. relative_target_port_identifier); pc++; } tpg_desc->target_port_count = pc; tpg_desc = (struct scsi_target_port_group_descriptor *) &tpg_desc->descriptors[pc]; } mtx_unlock(&softc->ctl_lock); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return(retval); } int ctl_report_supported_opcodes(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct scsi_report_supported_opcodes *cdb; const struct ctl_cmd_entry *entry, *sentry; struct scsi_report_supported_opcodes_all *all; struct scsi_report_supported_opcodes_descr *descr; struct scsi_report_supported_opcodes_one *one; int retval; int alloc_len, total_len; int opcode, service_action, i, j, num; CTL_DEBUG_PRINT(("ctl_report_supported_opcodes\n")); cdb = (struct scsi_report_supported_opcodes *)ctsio->cdb; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; retval = CTL_RETVAL_COMPLETE; opcode = cdb->requested_opcode; service_action = scsi_2btoul(cdb->requested_service_action); switch (cdb->options & RSO_OPTIONS_MASK) { case RSO_OPTIONS_ALL: num = 0; for (i = 0; i < 256; i++) { entry = &ctl_cmd_table[i]; if (entry->flags & CTL_CMD_FLAG_SA5) { for (j = 0; j < 32; j++) { sentry = &((const struct ctl_cmd_entry *) entry->execute)[j]; if (ctl_cmd_applicable( lun->be_lun->lun_type, sentry)) num++; } } else { if (ctl_cmd_applicable(lun->be_lun->lun_type, entry)) num++; } } total_len = sizeof(struct scsi_report_supported_opcodes_all) + num * sizeof(struct scsi_report_supported_opcodes_descr); break; case RSO_OPTIONS_OC: if (ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 2); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; break; case RSO_OPTIONS_OC_SA: if ((ctl_cmd_table[opcode].flags & CTL_CMD_FLAG_SA5) == 0 || service_action >= 32) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 2); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } total_len = sizeof(struct scsi_report_supported_opcodes_one) + 32; break; default: ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 2); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; switch (cdb->options & RSO_OPTIONS_MASK) { case RSO_OPTIONS_ALL: all = (struct scsi_report_supported_opcodes_all *) ctsio->kern_data_ptr; num = 0; for (i = 0; i < 256; i++) { entry = &ctl_cmd_table[i]; if (entry->flags & CTL_CMD_FLAG_SA5) { for (j = 0; j < 32; j++) { sentry = &((const struct ctl_cmd_entry *) entry->execute)[j]; if (!ctl_cmd_applicable( lun->be_lun->lun_type, sentry)) continue; descr = &all->descr[num++]; descr->opcode = i; scsi_ulto2b(j, descr->service_action); descr->flags = RSO_SERVACTV; scsi_ulto2b(sentry->length, descr->cdb_length); } } else { if (!ctl_cmd_applicable(lun->be_lun->lun_type, entry)) continue; descr = &all->descr[num++]; descr->opcode = i; scsi_ulto2b(0, descr->service_action); descr->flags = 0; scsi_ulto2b(entry->length, descr->cdb_length); } } scsi_ulto4b( num * sizeof(struct scsi_report_supported_opcodes_descr), all->length); break; case RSO_OPTIONS_OC: one = (struct scsi_report_supported_opcodes_one *) ctsio->kern_data_ptr; entry = &ctl_cmd_table[opcode]; goto fill_one; case RSO_OPTIONS_OC_SA: one = (struct scsi_report_supported_opcodes_one *) ctsio->kern_data_ptr; entry = &ctl_cmd_table[opcode]; entry = &((const struct ctl_cmd_entry *) entry->execute)[service_action]; fill_one: if (ctl_cmd_applicable(lun->be_lun->lun_type, entry)) { one->support = 3; scsi_ulto2b(entry->length, one->cdb_length); one->cdb_usage[0] = opcode; memcpy(&one->cdb_usage[1], entry->usage, entry->length - 1); } else one->support = 1; break; } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return(retval); } int ctl_report_supported_tmf(struct ctl_scsiio *ctsio) { struct scsi_report_supported_tmf *cdb; struct scsi_report_supported_tmf_data *data; int retval; int alloc_len, total_len; CTL_DEBUG_PRINT(("ctl_report_supported_tmf\n")); cdb = (struct scsi_report_supported_tmf *)ctsio->cdb; retval = CTL_RETVAL_COMPLETE; total_len = sizeof(struct scsi_report_supported_tmf_data); alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; data = (struct scsi_report_supported_tmf_data *)ctsio->kern_data_ptr; data->byte1 |= RST_ATS | RST_ATSS | RST_CTSS | RST_LURS | RST_QTS | RST_TRS; data->byte2 |= RST_QAES | RST_QTSS | RST_ITNRS; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (retval); } int ctl_report_timestamp(struct ctl_scsiio *ctsio) { struct scsi_report_timestamp *cdb; struct scsi_report_timestamp_data *data; struct timeval tv; int64_t timestamp; int retval; int alloc_len, total_len; CTL_DEBUG_PRINT(("ctl_report_timestamp\n")); cdb = (struct scsi_report_timestamp *)ctsio->cdb; retval = CTL_RETVAL_COMPLETE; total_len = sizeof(struct scsi_report_timestamp_data); alloc_len = scsi_4btoul(cdb->length); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); ctsio->kern_sg_entries = 0; if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; data = (struct scsi_report_timestamp_data *)ctsio->kern_data_ptr; scsi_ulto2b(sizeof(*data) - 2, data->length); data->origin = RTS_ORIG_OUTSIDE; getmicrotime(&tv); timestamp = (int64_t)tv.tv_sec * 1000 + tv.tv_usec / 1000; scsi_ulto4b(timestamp >> 16, data->timestamp); scsi_ulto2b(timestamp & 0xffff, &data->timestamp[4]); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (retval); } int ctl_persistent_reserve_in(struct ctl_scsiio *ctsio) { struct scsi_per_res_in *cdb; int alloc_len, total_len = 0; /* struct scsi_per_res_in_rsrv in_data; */ struct ctl_lun *lun; struct ctl_softc *softc; uint64_t key; CTL_DEBUG_PRINT(("ctl_persistent_reserve_in\n")); cdb = (struct scsi_per_res_in *)ctsio->cdb; alloc_len = scsi_2btoul(cdb->length); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; retry: mtx_lock(&lun->lun_lock); switch (cdb->action) { case SPRI_RK: /* read keys */ total_len = sizeof(struct scsi_per_res_in_keys) + lun->pr_key_count * sizeof(struct scsi_per_res_key); break; case SPRI_RR: /* read reservation */ if (lun->flags & CTL_LUN_PR_RESERVED) total_len = sizeof(struct scsi_per_res_in_rsrv); else total_len = sizeof(struct scsi_per_res_in_header); break; case SPRI_RC: /* report capabilities */ total_len = sizeof(struct scsi_per_res_cap); break; case SPRI_RS: /* read full status */ total_len = sizeof(struct scsi_per_res_in_header) + (sizeof(struct scsi_per_res_in_full_desc) + 256) * lun->pr_key_count; break; default: panic("Invalid PR type %x", cdb->action); } mtx_unlock(&lun->lun_lock); ctsio->kern_data_ptr = malloc(total_len, M_CTL, M_WAITOK | M_ZERO); if (total_len < alloc_len) { ctsio->residual = alloc_len - total_len; ctsio->kern_data_len = total_len; ctsio->kern_total_len = total_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; mtx_lock(&lun->lun_lock); switch (cdb->action) { case SPRI_RK: { // read keys struct scsi_per_res_in_keys *res_keys; int i, key_count; res_keys = (struct scsi_per_res_in_keys*)ctsio->kern_data_ptr; /* * We had to drop the lock to allocate our buffer, which * leaves time for someone to come in with another * persistent reservation. (That is unlikely, though, * since this should be the only persistent reservation * command active right now.) */ if (total_len != (sizeof(struct scsi_per_res_in_keys) + (lun->pr_key_count * sizeof(struct scsi_per_res_key)))){ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); printf("%s: reservation length changed, retrying\n", __func__); goto retry; } scsi_ulto4b(lun->PRGeneration, res_keys->header.generation); scsi_ulto4b(sizeof(struct scsi_per_res_key) * lun->pr_key_count, res_keys->header.length); for (i = 0, key_count = 0; i < CTL_MAX_INITIATORS; i++) { if ((key = ctl_get_prkey(lun, i)) == 0) continue; /* * We used lun->pr_key_count to calculate the * size to allocate. If it turns out the number of * initiators with the registered flag set is * larger than that (i.e. they haven't been kept in * sync), we've got a problem. */ if (key_count >= lun->pr_key_count) { #ifdef NEEDTOPORT csevent_log(CSC_CTL | CSC_SHELF_SW | CTL_PR_ERROR, csevent_LogType_Fault, csevent_AlertLevel_Yellow, csevent_FRU_ShelfController, csevent_FRU_Firmware, csevent_FRU_Unknown, "registered keys %d >= key " "count %d", key_count, lun->pr_key_count); #endif key_count++; continue; } scsi_u64to8b(key, res_keys->keys[key_count].key); key_count++; } break; } case SPRI_RR: { // read reservation struct scsi_per_res_in_rsrv *res; int tmp_len, header_only; res = (struct scsi_per_res_in_rsrv *)ctsio->kern_data_ptr; scsi_ulto4b(lun->PRGeneration, res->header.generation); if (lun->flags & CTL_LUN_PR_RESERVED) { tmp_len = sizeof(struct scsi_per_res_in_rsrv); scsi_ulto4b(sizeof(struct scsi_per_res_in_rsrv_data), res->header.length); header_only = 0; } else { tmp_len = sizeof(struct scsi_per_res_in_header); scsi_ulto4b(0, res->header.length); header_only = 1; } /* * We had to drop the lock to allocate our buffer, which * leaves time for someone to come in with another * persistent reservation. (That is unlikely, though, * since this should be the only persistent reservation * command active right now.) */ if (tmp_len != total_len) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); printf("%s: reservation status changed, retrying\n", __func__); goto retry; } /* * No reservation held, so we're done. */ if (header_only != 0) break; /* * If the registration is an All Registrants type, the key * is 0, since it doesn't really matter. */ if (lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) { scsi_u64to8b(ctl_get_prkey(lun, lun->pr_res_idx), res->data.reservation); } res->data.scopetype = lun->res_type; break; } case SPRI_RC: //report capabilities { struct scsi_per_res_cap *res_cap; uint16_t type_mask; res_cap = (struct scsi_per_res_cap *)ctsio->kern_data_ptr; scsi_ulto2b(sizeof(*res_cap), res_cap->length); res_cap->flags2 |= SPRI_TMV | SPRI_ALLOW_5; type_mask = SPRI_TM_WR_EX_AR | SPRI_TM_EX_AC_RO | SPRI_TM_WR_EX_RO | SPRI_TM_EX_AC | SPRI_TM_WR_EX | SPRI_TM_EX_AC_AR; scsi_ulto2b(type_mask, res_cap->type_mask); break; } case SPRI_RS: { // read full status struct scsi_per_res_in_full *res_status; struct scsi_per_res_in_full_desc *res_desc; struct ctl_port *port; int i, len; res_status = (struct scsi_per_res_in_full*)ctsio->kern_data_ptr; /* * We had to drop the lock to allocate our buffer, which * leaves time for someone to come in with another * persistent reservation. (That is unlikely, though, * since this should be the only persistent reservation * command active right now.) */ if (total_len < (sizeof(struct scsi_per_res_in_header) + (sizeof(struct scsi_per_res_in_full_desc) + 256) * lun->pr_key_count)){ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); printf("%s: reservation length changed, retrying\n", __func__); goto retry; } scsi_ulto4b(lun->PRGeneration, res_status->header.generation); res_desc = &res_status->desc[0]; for (i = 0; i < CTL_MAX_INITIATORS; i++) { if ((key = ctl_get_prkey(lun, i)) == 0) continue; scsi_u64to8b(key, res_desc->res_key.key); if ((lun->flags & CTL_LUN_PR_RESERVED) && (lun->pr_res_idx == i || lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS)) { res_desc->flags = SPRI_FULL_R_HOLDER; res_desc->scopetype = lun->res_type; } scsi_ulto2b(i / CTL_MAX_INIT_PER_PORT, res_desc->rel_trgt_port_id); len = 0; port = softc->ctl_ports[i / CTL_MAX_INIT_PER_PORT]; if (port != NULL) len = ctl_create_iid(port, i % CTL_MAX_INIT_PER_PORT, res_desc->transport_id); scsi_ulto4b(len, res_desc->additional_length); res_desc = (struct scsi_per_res_in_full_desc *) &res_desc->transport_id[len]; } scsi_ulto4b((uint8_t *)res_desc - (uint8_t *)&res_status->desc[0], res_status->header.length); break; } default: /* * This is a bug, because we just checked for this above, * and should have returned an error. */ panic("Invalid PR type %x", cdb->action); break; /* NOTREACHED */ } mtx_unlock(&lun->lun_lock); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * Returns 0 if ctl_persistent_reserve_out() should continue, non-zero if * it should return. */ static int ctl_pro_preempt(struct ctl_softc *softc, struct ctl_lun *lun, uint64_t res_key, uint64_t sa_res_key, uint8_t type, uint32_t residx, struct ctl_scsiio *ctsio, struct scsi_per_res_out *cdb, struct scsi_per_res_out_parms* param) { union ctl_ha_msg persis_io; int i; mtx_lock(&lun->lun_lock); if (sa_res_key == 0) { if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) { /* validate scope and type */ if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (1); } if (type>8 || type==2 || type==4 || type==0) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } /* * Unregister everybody else and build UA for * them */ for(i = 0; i < CTL_MAX_INITIATORS; i++) { if (i == residx || ctl_get_prkey(lun, i) == 0) continue; ctl_clr_prkey(lun, i); ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->pr_key_count = 1; lun->res_type = type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = residx; lun->PRGeneration++; mtx_unlock(&lun->lun_lock); /* send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } else { /* not all registrants */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 8, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } } else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS || !(lun->flags & CTL_LUN_PR_RESERVED)) { int found = 0; if (res_key == sa_res_key) { /* special case */ /* * The spec implies this is not good but doesn't * say what to do. There are two choices either * generate a res conflict or check condition * with illegal field in parameter data. Since * that is what is done when the sa_res_key is * zero I'll take that approach since this has * to do with the sa_res_key. */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 8, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } for (i = 0; i < CTL_MAX_INITIATORS; i++) { if (ctl_get_prkey(lun, i) != sa_res_key) continue; found = 1; ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } if (!found) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); /* send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } else { /* Reserved but not all registrants */ /* sa_res_key is res holder */ if (sa_res_key == ctl_get_prkey(lun, lun->pr_res_idx)) { /* validate scope and type */ if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (1); } if (type>8 || type==2 || type==4 || type==0) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (1); } /* * Do the following: * if sa_res_key != res_key remove all * registrants w/sa_res_key and generate UA * for these registrants(Registrations * Preempted) if it wasn't an exclusive * reservation generate UA(Reservations * Preempted) for all other registered nexuses * if the type has changed. Establish the new * reservation and holder. If res_key and * sa_res_key are the same do the above * except don't unregister the res holder. */ for(i = 0; i < CTL_MAX_INITIATORS; i++) { if (i == residx || ctl_get_prkey(lun, i) == 0) continue; if (sa_res_key == ctl_get_prkey(lun, i)) { ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } else if (type != lun->res_type && (lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type ==SPR_TYPE_EX_AC_RO)){ ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = residx; else lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS; lun->PRGeneration++; mtx_unlock(&lun->lun_lock); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } else { /* * sa_res_key is not the res holder just * remove registrants */ int found=0; for (i = 0; i < CTL_MAX_INITIATORS; i++) { if (sa_res_key != ctl_get_prkey(lun, i)) continue; found = 1; ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } if (!found) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (1); } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_PREEMPT; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } } return (0); } static void ctl_pro_preempt_other(struct ctl_lun *lun, union ctl_ha_msg *msg) { uint64_t sa_res_key; int i; sa_res_key = scsi_8btou64(msg->pr.pr_info.sa_res_key); if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS || lun->pr_res_idx == CTL_PR_NO_RESERVATION || sa_res_key != ctl_get_prkey(lun, lun->pr_res_idx)) { if (sa_res_key == 0) { /* * Unregister everybody else and build UA for * them */ for(i = 0; i < CTL_MAX_INITIATORS; i++) { if (i == msg->pr.pr_info.residx || ctl_get_prkey(lun, i) == 0) continue; ctl_clr_prkey(lun, i); ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->pr_key_count = 1; lun->res_type = msg->pr.pr_info.res_type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = msg->pr.pr_info.residx; } else { for (i = 0; i < CTL_MAX_INITIATORS; i++) { if (sa_res_key == ctl_get_prkey(lun, i)) continue; ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } } } else { for (i = 0; i < CTL_MAX_INITIATORS; i++) { if (i == msg->pr.pr_info.residx || ctl_get_prkey(lun, i) == 0) continue; if (sa_res_key == ctl_get_prkey(lun, i)) { ctl_clr_prkey(lun, i); lun->pr_key_count--; ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } else if (msg->pr.pr_info.res_type != lun->res_type && (lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_EX_AC_RO)) { ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = msg->pr.pr_info.res_type; if (lun->res_type != SPR_TYPE_WR_EX_AR && lun->res_type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = msg->pr.pr_info.residx; else lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS; } lun->PRGeneration++; } int ctl_persistent_reserve_out(struct ctl_scsiio *ctsio) { int retval; u_int32_t param_len; struct scsi_per_res_out *cdb; struct ctl_lun *lun; struct scsi_per_res_out_parms* param; struct ctl_softc *softc; uint32_t residx; uint64_t res_key, sa_res_key, key; uint8_t type; union ctl_ha_msg persis_io; int i; CTL_DEBUG_PRINT(("ctl_persistent_reserve_out\n")); retval = CTL_RETVAL_COMPLETE; cdb = (struct scsi_per_res_out *)ctsio->cdb; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; softc = lun->ctl_softc; /* * We only support whole-LUN scope. The scope & type are ignored for * register, register and ignore existing key and clear. * We sometimes ignore scope and type on preempts too!! * Verify reservation type here as well. */ type = cdb->scope_type & SPR_TYPE_MASK; if ((cdb->action == SPRO_RESERVE) || (cdb->action == SPRO_RELEASE)) { if ((cdb->scope_type & SPR_SCOPE_MASK) != SPR_LU_SCOPE) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 4); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } if (type>8 || type==2 || type==4 || type==0) { ctl_set_invalid_field(/*ctsio*/ ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } } param_len = scsi_4btoul(cdb->length); if ((ctsio->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0) { ctsio->kern_data_ptr = malloc(param_len, M_CTL, M_WAITOK); ctsio->kern_data_len = param_len; ctsio->kern_total_len = param_len; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } param = (struct scsi_per_res_out_parms *)ctsio->kern_data_ptr; residx = ctl_get_initindex(&ctsio->io_hdr.nexus); res_key = scsi_8btou64(param->res_key.key); sa_res_key = scsi_8btou64(param->serv_act_res_key); /* * Validate the reservation key here except for SPRO_REG_IGNO * This must be done for all other service actions */ if ((cdb->action & SPRO_ACTION_MASK) != SPRO_REG_IGNO) { mtx_lock(&lun->lun_lock); if ((key = ctl_get_prkey(lun, residx)) != 0) { if (res_key != key) { /* * The current key passed in doesn't match * the one the initiator previously * registered. */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } } else if ((cdb->action & SPRO_ACTION_MASK) != SPRO_REGISTER) { /* * We are not registered */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } else if (res_key != 0) { /* * We are not registered and trying to register but * the register key isn't zero. */ mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } mtx_unlock(&lun->lun_lock); } switch (cdb->action & SPRO_ACTION_MASK) { case SPRO_REGISTER: case SPRO_REG_IGNO: { #if 0 printf("Registration received\n"); #endif /* * We don't support any of these options, as we report in * the read capabilities request (see * ctl_persistent_reserve_in(), above). */ if ((param->flags & SPR_SPEC_I_PT) || (param->flags & SPR_ALL_TG_PT) || (param->flags & SPR_APTPL)) { int bit_ptr; if (param->flags & SPR_APTPL) bit_ptr = 0; else if (param->flags & SPR_ALL_TG_PT) bit_ptr = 2; else /* SPR_SPEC_I_PT */ bit_ptr = 3; free(ctsio->kern_data_ptr, M_CTL); ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 0, /*field*/ 20, /*bit_valid*/ 1, /*bit*/ bit_ptr); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } mtx_lock(&lun->lun_lock); /* * The initiator wants to clear the * key/unregister. */ if (sa_res_key == 0) { if ((res_key == 0 && (cdb->action & SPRO_ACTION_MASK) == SPRO_REGISTER) || ((cdb->action & SPRO_ACTION_MASK) == SPRO_REG_IGNO && ctl_get_prkey(lun, residx) == 0)) { mtx_unlock(&lun->lun_lock); goto done; } ctl_clr_prkey(lun, residx); lun->pr_key_count--; if (residx == lun->pr_res_idx) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; if ((lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_EX_AC_RO) && lun->pr_key_count) { /* * If the reservation is a registrants * only type we need to generate a UA * for other registered inits. The * sense code should be RESERVATIONS * RELEASED */ for (i = softc->init_min; i < softc->init_max; i++){ if (ctl_get_prkey(lun, i) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = 0; } else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) { if (lun->pr_key_count==0) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; } } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_UNREG_KEY; persis_io.pr.pr_info.residx = residx; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } else /* sa_res_key != 0 */ { /* * If we aren't registered currently then increment * the key count and set the registered flag. */ ctl_alloc_prkey(lun, residx); if (ctl_get_prkey(lun, residx) == 0) lun->pr_key_count++; ctl_set_prkey(lun, residx, sa_res_key); lun->PRGeneration++; mtx_unlock(&lun->lun_lock); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_REG_KEY; persis_io.pr.pr_info.residx = residx; memcpy(persis_io.pr.pr_info.sa_res_key, param->serv_act_res_key, sizeof(param->serv_act_res_key)); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } break; } case SPRO_RESERVE: #if 0 printf("Reserve executed type %d\n", type); #endif mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_PR_RESERVED) { /* * if this isn't the reservation holder and it's * not a "all registrants" type or if the type is * different then we have a conflict */ if ((lun->pr_res_idx != residx && lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) || lun->res_type != type) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_reservation_conflict(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } mtx_unlock(&lun->lun_lock); } else /* create a reservation */ { /* * If it's not an "all registrants" type record * reservation holder */ if (type != SPR_TYPE_WR_EX_AR && type != SPR_TYPE_EX_AC_AR) lun->pr_res_idx = residx; /* Res holder */ else lun->pr_res_idx = CTL_PR_ALL_REGISTRANTS; lun->flags |= CTL_LUN_PR_RESERVED; lun->res_type = type; mtx_unlock(&lun->lun_lock); /* send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_RESERVE; persis_io.pr.pr_info.residx = lun->pr_res_idx; persis_io.pr.pr_info.res_type = type; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); } break; case SPRO_RELEASE: mtx_lock(&lun->lun_lock); if ((lun->flags & CTL_LUN_PR_RESERVED) == 0) { /* No reservation exists return good status */ mtx_unlock(&lun->lun_lock); goto done; } /* * Is this nexus a reservation holder? */ if (lun->pr_res_idx != residx && lun->pr_res_idx != CTL_PR_ALL_REGISTRANTS) { /* * not a res holder return good status but * do nothing */ mtx_unlock(&lun->lun_lock); goto done; } if (lun->res_type != type) { mtx_unlock(&lun->lun_lock); free(ctsio->kern_data_ptr, M_CTL); ctl_set_illegal_pr_release(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* okay to release */ lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; lun->res_type = 0; /* * if this isn't an exclusive access * res generate UA for all other * registrants. */ if (type != SPR_TYPE_EX_AC && type != SPR_TYPE_WR_EX) { for (i = softc->init_min; i < softc->init_max; i++) { if (i == residx || ctl_get_prkey(lun, i) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } mtx_unlock(&lun->lun_lock); /* Send msg to other side */ persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_RELEASE; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); break; case SPRO_CLEAR: /* send msg to other side */ mtx_lock(&lun->lun_lock); lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_key_count = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; ctl_clr_prkey(lun, residx); for (i = 0; i < CTL_MAX_INITIATORS; i++) if (ctl_get_prkey(lun, i) != 0) { ctl_clr_prkey(lun, i); ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->PRGeneration++; mtx_unlock(&lun->lun_lock); persis_io.hdr.nexus = ctsio->io_hdr.nexus; persis_io.hdr.msg_type = CTL_MSG_PERS_ACTION; persis_io.pr.pr_info.action = CTL_PR_CLEAR; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &persis_io, sizeof(persis_io.pr), M_WAITOK); break; case SPRO_PREEMPT: case SPRO_PRE_ABO: { int nretval; nretval = ctl_pro_preempt(softc, lun, res_key, sa_res_key, type, residx, ctsio, cdb, param); if (nretval != 0) return (CTL_RETVAL_COMPLETE); break; } default: panic("Invalid PR type %x", cdb->action); } done: free(ctsio->kern_data_ptr, M_CTL); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (retval); } /* * This routine is for handling a message from the other SC pertaining to * persistent reserve out. All the error checking will have been done * so only perorming the action need be done here to keep the two * in sync. */ static void ctl_hndl_per_res_out_on_other_sc(union ctl_ha_msg *msg) { struct ctl_lun *lun; struct ctl_softc *softc; int i; uint32_t residx, targ_lun; softc = control_softc; targ_lun = msg->hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || ((lun = softc->ctl_luns[targ_lun]) == NULL)) { mtx_unlock(&softc->ctl_lock); return; } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); return; } residx = ctl_get_initindex(&msg->hdr.nexus); switch(msg->pr.pr_info.action) { case CTL_PR_REG_KEY: ctl_alloc_prkey(lun, msg->pr.pr_info.residx); if (ctl_get_prkey(lun, msg->pr.pr_info.residx) == 0) lun->pr_key_count++; ctl_set_prkey(lun, msg->pr.pr_info.residx, scsi_8btou64(msg->pr.pr_info.sa_res_key)); lun->PRGeneration++; break; case CTL_PR_UNREG_KEY: ctl_clr_prkey(lun, msg->pr.pr_info.residx); lun->pr_key_count--; /* XXX Need to see if the reservation has been released */ /* if so do we need to generate UA? */ if (msg->pr.pr_info.residx == lun->pr_res_idx) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; if ((lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_EX_AC_RO) && lun->pr_key_count) { /* * If the reservation is a registrants * only type we need to generate a UA * for other registered inits. The * sense code should be RESERVATIONS * RELEASED */ for (i = softc->init_min; i < softc->init_max; i++) { if (ctl_get_prkey(lun, i) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } } lun->res_type = 0; } else if (lun->pr_res_idx == CTL_PR_ALL_REGISTRANTS) { if (lun->pr_key_count==0) { lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; } } lun->PRGeneration++; break; case CTL_PR_RESERVE: lun->flags |= CTL_LUN_PR_RESERVED; lun->res_type = msg->pr.pr_info.res_type; lun->pr_res_idx = msg->pr.pr_info.residx; break; case CTL_PR_RELEASE: /* * if this isn't an exclusive access res generate UA for all * other registrants. */ if (lun->res_type != SPR_TYPE_EX_AC && lun->res_type != SPR_TYPE_WR_EX) { for (i = softc->init_min; i < softc->init_max; i++) if (i == residx || ctl_get_prkey(lun, i) == 0) continue; ctl_est_ua(lun, i, CTL_UA_RES_RELEASE); } lun->flags &= ~CTL_LUN_PR_RESERVED; lun->pr_res_idx = CTL_PR_NO_RESERVATION; lun->res_type = 0; break; case CTL_PR_PREEMPT: ctl_pro_preempt_other(lun, msg); break; case CTL_PR_CLEAR: lun->flags &= ~CTL_LUN_PR_RESERVED; lun->res_type = 0; lun->pr_key_count = 0; lun->pr_res_idx = CTL_PR_NO_RESERVATION; for (i=0; i < CTL_MAX_INITIATORS; i++) { if (ctl_get_prkey(lun, i) == 0) continue; ctl_clr_prkey(lun, i); ctl_est_ua(lun, i, CTL_UA_REG_PREEMPT); } lun->PRGeneration++; break; } mtx_unlock(&lun->lun_lock); } int ctl_read_write(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int flags, retval; int isread; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_read_write: command: %#x\n", ctsio->cdb[0])); flags = 0; retval = CTL_RETVAL_COMPLETE; isread = ctsio->cdb[0] == READ_6 || ctsio->cdb[0] == READ_10 || ctsio->cdb[0] == READ_12 || ctsio->cdb[0] == READ_16; switch (ctsio->cdb[0]) { case READ_6: case WRITE_6: { struct scsi_rw_6 *cdb; cdb = (struct scsi_rw_6 *)ctsio->cdb; lba = scsi_3btoul(cdb->addr); /* only 5 bits are valid in the most significant address byte */ lba &= 0x1fffff; num_blocks = cdb->length; /* * This is correct according to SBC-2. */ if (num_blocks == 0) num_blocks = 256; break; } case READ_10: case WRITE_10: { struct scsi_rw_10 *cdb; cdb = (struct scsi_rw_10 *)ctsio->cdb; if (cdb->byte2 & SRW10_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW10_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); break; } case WRITE_VERIFY_10: { struct scsi_write_verify_10 *cdb; cdb = (struct scsi_write_verify_10 *)ctsio->cdb; flags |= CTL_LLF_FUA; if (cdb->byte2 & SWV_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); break; } case READ_12: case WRITE_12: { struct scsi_rw_12 *cdb; cdb = (struct scsi_rw_12 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case WRITE_VERIFY_12: { struct scsi_write_verify_12 *cdb; cdb = (struct scsi_write_verify_12 *)ctsio->cdb; flags |= CTL_LLF_FUA; if (cdb->byte2 & SWV_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case READ_16: case WRITE_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case WRITE_ATOMIC_16: { struct scsi_rw_16 *cdb; if (lun->be_lun->atomicblock == 0) { ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } cdb = (struct scsi_rw_16 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); if (num_blocks > lun->be_lun->atomicblock) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 12, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } break; } case WRITE_VERIFY_16: { struct scsi_write_verify_16 *cdb; cdb = (struct scsi_write_verify_16 *)ctsio->cdb; flags |= CTL_LLF_FUA; if (cdb->byte2 & SWV_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * According to SBC-3, a transfer length of 0 is not an error. * Note that this cannot happen with WRITE(6) or READ(6), since 0 * translates to 256 blocks for those commands. */ if (num_blocks == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* Set FUA and/or DPO if caches are disabled. */ if (isread) { if ((lun->mode_pages.caching_page[CTL_PAGE_CURRENT].flags1 & SCP_RCD) != 0) flags |= CTL_LLF_FUA | CTL_LLF_DPO; } else { if ((lun->mode_pages.caching_page[CTL_PAGE_CURRENT].flags1 & SCP_WCE) == 0) flags |= CTL_LLF_FUA; } lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; lbalen->flags = (isread ? CTL_LLF_READ : CTL_LLF_WRITE) | flags; ctsio->kern_total_len = num_blocks * lun->be_lun->blocksize; ctsio->kern_rel_offset = 0; CTL_DEBUG_PRINT(("ctl_read_write: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } static int ctl_cnw_cont(union ctl_io *io) { struct ctl_scsiio *ctsio; struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; int retval; ctsio = &io->scsiio; ctsio->io_hdr.status = CTL_STATUS_NONE; ctsio->io_hdr.flags &= ~CTL_FLAG_IO_CONT; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->flags &= ~CTL_LLF_COMPARE; lbalen->flags |= CTL_LLF_WRITE; CTL_DEBUG_PRINT(("ctl_cnw_cont: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } int ctl_cnw(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int flags, retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_cnw: command: %#x\n", ctsio->cdb[0])); flags = 0; retval = CTL_RETVAL_COMPLETE; switch (ctsio->cdb[0]) { case COMPARE_AND_WRITE: { struct scsi_compare_and_write *cdb; cdb = (struct scsi_compare_and_write *)ctsio->cdb; if (cdb->byte2 & SRW10_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW10_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = cdb->length; break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); break; /* NOTREACHED */ } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * According to SBC-3, a transfer length of 0 is not an error. */ if (num_blocks == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* Set FUA if write cache is disabled. */ if ((lun->mode_pages.caching_page[CTL_PAGE_CURRENT].flags1 & SCP_WCE) == 0) flags |= CTL_LLF_FUA; ctsio->kern_total_len = 2 * num_blocks * lun->be_lun->blocksize; ctsio->kern_rel_offset = 0; /* * Set the IO_CONT flag, so that if this I/O gets passed to * ctl_data_submit_done(), it'll get passed back to * ctl_ctl_cnw_cont() for further processing. */ ctsio->io_hdr.flags |= CTL_FLAG_IO_CONT; ctsio->io_cont = ctl_cnw_cont; lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; lbalen->flags = CTL_LLF_COMPARE | flags; CTL_DEBUG_PRINT(("ctl_cnw: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } int ctl_verify(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct ctl_lba_len_flags *lbalen; uint64_t lba; uint32_t num_blocks; int bytchk, flags; int retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_verify: command: %#x\n", ctsio->cdb[0])); bytchk = 0; flags = CTL_LLF_FUA; retval = CTL_RETVAL_COMPLETE; switch (ctsio->cdb[0]) { case VERIFY_10: { struct scsi_verify_10 *cdb; cdb = (struct scsi_verify_10 *)ctsio->cdb; if (cdb->byte2 & SVFY_BYTCHK) bytchk = 1; if (cdb->byte2 & SVFY_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_2btoul(cdb->length); break; } case VERIFY_12: { struct scsi_verify_12 *cdb; cdb = (struct scsi_verify_12 *)ctsio->cdb; if (cdb->byte2 & SVFY_BYTCHK) bytchk = 1; if (cdb->byte2 & SVFY_DPO) flags |= CTL_LLF_DPO; lba = scsi_4btoul(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } case VERIFY_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)ctsio->cdb; if (cdb->byte2 & SVFY_BYTCHK) bytchk = 1; if (cdb->byte2 & SVFY_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); num_blocks = scsi_4btoul(cdb->length); break; } default: /* * We got a command we don't support. This shouldn't * happen, commands should be filtered out above us. */ ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * The first check is to make sure we're in bounds, the second * check is to catch wrap-around problems. If the lba + num blocks * is less than the lba, then we've wrapped around and the block * range is invalid anyway. */ if (((lba + num_blocks) > (lun->be_lun->maxlba + 1)) || ((lba + num_blocks) < lba)) { ctl_set_lba_out_of_range(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * According to SBC-3, a transfer length of 0 is not an error. */ if (num_blocks == 0) { ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } lbalen = (struct ctl_lba_len_flags *) &ctsio->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; lbalen->lba = lba; lbalen->len = num_blocks; if (bytchk) { lbalen->flags = CTL_LLF_COMPARE | flags; ctsio->kern_total_len = num_blocks * lun->be_lun->blocksize; } else { lbalen->flags = CTL_LLF_VERIFY | flags; ctsio->kern_total_len = 0; } ctsio->kern_rel_offset = 0; CTL_DEBUG_PRINT(("ctl_verify: calling data_submit()\n")); retval = lun->backend->data_submit((union ctl_io *)ctsio); return (retval); } int ctl_report_luns(struct ctl_scsiio *ctsio) { struct ctl_softc *softc = control_softc; struct scsi_report_luns *cdb; struct scsi_report_luns_data *lun_data; struct ctl_lun *lun, *request_lun; struct ctl_port *port; int num_luns, retval; uint32_t alloc_len, lun_datalen; int num_filled, well_known; uint32_t initidx, targ_lun_id, lun_id; retval = CTL_RETVAL_COMPLETE; well_known = 0; cdb = (struct scsi_report_luns *)ctsio->cdb; port = ctl_io_port(&ctsio->io_hdr); CTL_DEBUG_PRINT(("ctl_report_luns\n")); mtx_lock(&softc->ctl_lock); num_luns = 0; for (targ_lun_id = 0; targ_lun_id < CTL_MAX_LUNS; targ_lun_id++) { if (ctl_lun_map_from_port(port, targ_lun_id) < CTL_MAX_LUNS) num_luns++; } mtx_unlock(&softc->ctl_lock); switch (cdb->select_report) { case RPL_REPORT_DEFAULT: case RPL_REPORT_ALL: break; case RPL_REPORT_WELLKNOWN: well_known = 1; num_luns = 0; break; default: ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (retval); break; /* NOTREACHED */ } alloc_len = scsi_4btoul(cdb->length); /* * The initiator has to allocate at least 16 bytes for this request, * so he can at least get the header and the first LUN. Otherwise * we reject the request (per SPC-3 rev 14, section 6.21). */ if (alloc_len < (sizeof(struct scsi_report_luns_data) + sizeof(struct scsi_report_luns_lundata))) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 6, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (retval); } request_lun = (struct ctl_lun *) ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; lun_datalen = sizeof(*lun_data) + (num_luns * sizeof(struct scsi_report_luns_lundata)); ctsio->kern_data_ptr = malloc(lun_datalen, M_CTL, M_WAITOK | M_ZERO); lun_data = (struct scsi_report_luns_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); mtx_lock(&softc->ctl_lock); for (targ_lun_id = 0, num_filled = 0; targ_lun_id < CTL_MAX_LUNS && num_filled < num_luns; targ_lun_id++) { lun_id = ctl_lun_map_from_port(port, targ_lun_id); if (lun_id >= CTL_MAX_LUNS) continue; lun = softc->ctl_luns[lun_id]; if (lun == NULL) continue; if (targ_lun_id <= 0xff) { /* * Peripheral addressing method, bus number 0. */ lun_data->luns[num_filled].lundata[0] = RPL_LUNDATA_ATYP_PERIPH; lun_data->luns[num_filled].lundata[1] = targ_lun_id; num_filled++; } else if (targ_lun_id <= 0x3fff) { /* * Flat addressing method. */ lun_data->luns[num_filled].lundata[0] = RPL_LUNDATA_ATYP_FLAT | (targ_lun_id >> 8); lun_data->luns[num_filled].lundata[1] = (targ_lun_id & 0xff); num_filled++; } else if (targ_lun_id <= 0xffffff) { /* * Extended flat addressing method. */ lun_data->luns[num_filled].lundata[0] = RPL_LUNDATA_ATYP_EXTLUN | 0x12; scsi_ulto3b(targ_lun_id, &lun_data->luns[num_filled].lundata[1]); num_filled++; } else { printf("ctl_report_luns: bogus LUN number %jd, " "skipping\n", (intmax_t)targ_lun_id); } /* * According to SPC-3, rev 14 section 6.21: * * "The execution of a REPORT LUNS command to any valid and * installed logical unit shall clear the REPORTED LUNS DATA * HAS CHANGED unit attention condition for all logical * units of that target with respect to the requesting * initiator. A valid and installed logical unit is one * having a PERIPHERAL QUALIFIER of 000b in the standard * INQUIRY data (see 6.4.2)." * * If request_lun is NULL, the LUN this report luns command * was issued to is either disabled or doesn't exist. In that * case, we shouldn't clear any pending lun change unit * attention. */ if (request_lun != NULL) { mtx_lock(&lun->lun_lock); ctl_clr_ua(lun, initidx, CTL_UA_LUN_CHANGE); mtx_unlock(&lun->lun_lock); } } mtx_unlock(&softc->ctl_lock); /* * It's quite possible that we've returned fewer LUNs than we allocated * space for. Trim it. */ lun_datalen = sizeof(*lun_data) + (num_filled * sizeof(struct scsi_report_luns_lundata)); if (lun_datalen < alloc_len) { ctsio->residual = alloc_len - lun_datalen; ctsio->kern_data_len = lun_datalen; ctsio->kern_total_len = lun_datalen; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * We set this to the actual data length, regardless of how much * space we actually have to return results. If the user looks at * this value, he'll know whether or not he allocated enough space * and reissue the command if necessary. We don't support well * known logical units, so if the user asks for that, return none. */ scsi_ulto4b(lun_datalen - 8, lun_data->length); /* * We can only return SCSI_STATUS_CHECK_COND when we can't satisfy * this request. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (retval); } int ctl_request_sense(struct ctl_scsiio *ctsio) { struct scsi_request_sense *cdb; struct scsi_sense_data *sense_ptr; struct ctl_softc *ctl_softc; struct ctl_lun *lun; uint32_t initidx; int have_error; scsi_sense_data_type sense_format; ctl_ua_type ua_type; cdb = (struct scsi_request_sense *)ctsio->cdb; ctl_softc = control_softc; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; CTL_DEBUG_PRINT(("ctl_request_sense\n")); /* * Determine which sense format the user wants. */ if (cdb->byte2 & SRS_DESC) sense_format = SSD_TYPE_DESC; else sense_format = SSD_TYPE_FIXED; ctsio->kern_data_ptr = malloc(sizeof(*sense_ptr), M_CTL, M_WAITOK); sense_ptr = (struct scsi_sense_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; /* * struct scsi_sense_data, which is currently set to 256 bytes, is * larger than the largest allowed value for the length field in the * REQUEST SENSE CDB, which is 252 bytes as of SPC-4. */ ctsio->residual = 0; ctsio->kern_data_len = cdb->length; ctsio->kern_total_len = cdb->length; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * If we don't have a LUN, we don't have any pending sense. */ if (lun == NULL) goto no_sense; have_error = 0; initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); /* * Check for pending sense, and then for pending unit attentions. * Pending sense gets returned first, then pending unit attentions. */ mtx_lock(&lun->lun_lock); #ifdef CTL_WITH_CA if (ctl_is_set(lun->have_ca, initidx)) { scsi_sense_data_type stored_format; /* * Check to see which sense format was used for the stored * sense data. */ stored_format = scsi_sense_type(&lun->pending_sense[initidx]); /* * If the user requested a different sense format than the * one we stored, then we need to convert it to the other * format. If we're going from descriptor to fixed format * sense data, we may lose things in translation, depending * on what options were used. * * If the stored format is SSD_TYPE_NONE (i.e. invalid), * for some reason we'll just copy it out as-is. */ if ((stored_format == SSD_TYPE_FIXED) && (sense_format == SSD_TYPE_DESC)) ctl_sense_to_desc((struct scsi_sense_data_fixed *) &lun->pending_sense[initidx], (struct scsi_sense_data_desc *)sense_ptr); else if ((stored_format == SSD_TYPE_DESC) && (sense_format == SSD_TYPE_FIXED)) ctl_sense_to_fixed((struct scsi_sense_data_desc *) &lun->pending_sense[initidx], (struct scsi_sense_data_fixed *)sense_ptr); else memcpy(sense_ptr, &lun->pending_sense[initidx], MIN(sizeof(*sense_ptr), sizeof(lun->pending_sense[initidx]))); ctl_clear_mask(lun->have_ca, initidx); have_error = 1; } else #endif { ua_type = ctl_build_ua(lun, initidx, sense_ptr, sense_format); if (ua_type != CTL_UA_NONE) have_error = 1; if (ua_type == CTL_UA_LUN_CHANGE) { mtx_unlock(&lun->lun_lock); mtx_lock(&ctl_softc->ctl_lock); ctl_clr_ua_allluns(ctl_softc, initidx, ua_type); mtx_unlock(&ctl_softc->ctl_lock); mtx_lock(&lun->lun_lock); } } mtx_unlock(&lun->lun_lock); /* * We already have a pending error, return it. */ if (have_error != 0) { /* * We report the SCSI status as OK, since the status of the * request sense command itself is OK. * We report 0 for the sense length, because we aren't doing * autosense in this case. We're reporting sense as * parameter data. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } no_sense: /* * No sense information to report, so we report that everything is * okay. */ ctl_set_sense_data(sense_ptr, lun, sense_format, /*current_error*/ 1, /*sense_key*/ SSD_KEY_NO_SENSE, /*asc*/ 0x00, /*ascq*/ 0x00, SSD_ELEM_NONE); /* * We report 0 for the sense length, because we aren't doing * autosense in this case. We're reporting sense as parameter data. */ ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_tur(struct ctl_scsiio *ctsio) { CTL_DEBUG_PRINT(("ctl_tur\n")); ctl_set_success(ctsio); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x00, the Supported VPD Pages page. */ static int ctl_inquiry_evpd_supported(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_supported_pages *pages; int sup_page_size; struct ctl_lun *lun; int p; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; sup_page_size = sizeof(struct scsi_vpd_supported_pages) * SCSI_EVPD_NUM_SUPPORTED_PAGES; ctsio->kern_data_ptr = malloc(sup_page_size, M_CTL, M_WAITOK | M_ZERO); pages = (struct scsi_vpd_supported_pages *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sup_page_size < alloc_len) { ctsio->residual = alloc_len - sup_page_size; ctsio->kern_data_len = sup_page_size; ctsio->kern_total_len = sup_page_size; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) pages->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else pages->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; p = 0; /* Supported VPD pages */ pages->page_list[p++] = SVPD_SUPPORTED_PAGES; /* Serial Number */ pages->page_list[p++] = SVPD_UNIT_SERIAL_NUMBER; /* Device Identification */ pages->page_list[p++] = SVPD_DEVICE_ID; /* Extended INQUIRY Data */ pages->page_list[p++] = SVPD_EXTENDED_INQUIRY_DATA; /* Mode Page Policy */ pages->page_list[p++] = SVPD_MODE_PAGE_POLICY; /* SCSI Ports */ pages->page_list[p++] = SVPD_SCSI_PORTS; /* Third-party Copy */ pages->page_list[p++] = SVPD_SCSI_TPC; if (lun != NULL && lun->be_lun->lun_type == T_DIRECT) { /* Block limits */ pages->page_list[p++] = SVPD_BLOCK_LIMITS; /* Block Device Characteristics */ pages->page_list[p++] = SVPD_BDC; /* Logical Block Provisioning */ pages->page_list[p++] = SVPD_LBP; } pages->length = p; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x80, the Unit Serial Number page. */ static int ctl_inquiry_evpd_serial(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_unit_serial_number *sn_ptr; struct ctl_lun *lun; int data_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = 4 + CTL_SN_LEN; ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); sn_ptr = (struct scsi_vpd_unit_serial_number *)ctsio->kern_data_ptr; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) sn_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else sn_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; sn_ptr->page_code = SVPD_UNIT_SERIAL_NUMBER; sn_ptr->length = CTL_SN_LEN; /* * If we don't have a LUN, we just leave the serial number as * all spaces. */ if (lun != NULL) { strncpy((char *)sn_ptr->serial_num, (char *)lun->be_lun->serial_num, CTL_SN_LEN); } else memset(sn_ptr->serial_num, 0x20, CTL_SN_LEN); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x86, the Extended INQUIRY Data page. */ static int ctl_inquiry_evpd_eid(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_extended_inquiry_data *eid_ptr; struct ctl_lun *lun; int data_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = sizeof(struct scsi_vpd_extended_inquiry_data); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); eid_ptr = (struct scsi_vpd_extended_inquiry_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. */ if (lun != NULL) eid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else eid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; eid_ptr->page_code = SVPD_EXTENDED_INQUIRY_DATA; scsi_ulto2b(data_len - 4, eid_ptr->page_length); /* * We support head of queue, ordered and simple tags. */ eid_ptr->flags2 = SVPD_EID_HEADSUP | SVPD_EID_ORDSUP | SVPD_EID_SIMPSUP; /* * Volatile cache supported. */ eid_ptr->flags3 = SVPD_EID_V_SUP; /* * This means that we clear the REPORTED LUNS DATA HAS CHANGED unit * attention for a particular IT nexus on all LUNs once we report * it to that nexus once. This bit is required as of SPC-4. */ eid_ptr->flags4 = SVPD_EID_LUICLT; /* * XXX KDM in order to correctly answer this, we would need * information from the SIM to determine how much sense data it * can send. So this would really be a path inquiry field, most * likely. This can be set to a maximum of 252 according to SPC-4, * but the hardware may or may not be able to support that much. * 0 just means that the maximum sense data length is not reported. */ eid_ptr->max_sense_length = 0; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_mpp(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_mode_page_policy *mpp_ptr; struct ctl_lun *lun; int data_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = sizeof(struct scsi_vpd_mode_page_policy) + sizeof(struct scsi_vpd_mode_page_policy_descr); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); mpp_ptr = (struct scsi_vpd_mode_page_policy *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. */ if (lun != NULL) mpp_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else mpp_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; mpp_ptr->page_code = SVPD_MODE_PAGE_POLICY; scsi_ulto2b(data_len - 4, mpp_ptr->page_length); mpp_ptr->descr[0].page_code = 0x3f; mpp_ptr->descr[0].subpage_code = 0xff; mpp_ptr->descr[0].policy = SVPD_MPP_SHARED; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * SCSI VPD page 0x83, the Device Identification page. */ static int ctl_inquiry_evpd_devid(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_device_id *devid_ptr; struct scsi_vpd_id_descriptor *desc; struct ctl_softc *softc; struct ctl_lun *lun; struct ctl_port *port; int data_len; uint8_t proto; softc = control_softc; port = ctl_io_port(&ctsio->io_hdr); lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; data_len = sizeof(struct scsi_vpd_device_id) + sizeof(struct scsi_vpd_id_descriptor) + sizeof(struct scsi_vpd_id_rel_trgt_port_id) + sizeof(struct scsi_vpd_id_descriptor) + sizeof(struct scsi_vpd_id_trgt_port_grp_id); if (lun && lun->lun_devid) data_len += lun->lun_devid->len; if (port && port->port_devid) data_len += port->port_devid->len; if (port && port->target_devid) data_len += port->target_devid->len; ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); devid_ptr = (struct scsi_vpd_device_id *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. */ if (lun != NULL) devid_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else devid_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; devid_ptr->page_code = SVPD_DEVICE_ID; scsi_ulto2b(data_len - 4, devid_ptr->length); if (port && port->port_type == CTL_PORT_FC) proto = SCSI_PROTO_FC << 4; else if (port && port->port_type == CTL_PORT_ISCSI) proto = SCSI_PROTO_ISCSI << 4; else proto = SCSI_PROTO_SPI << 4; desc = (struct scsi_vpd_id_descriptor *)devid_ptr->desc_list; /* * We're using a LUN association here. i.e., this device ID is a * per-LUN identifier. */ if (lun && lun->lun_devid) { memcpy(desc, lun->lun_devid->data, lun->lun_devid->len); desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc + lun->lun_devid->len); } /* * This is for the WWPN which is a port association. */ if (port && port->port_devid) { memcpy(desc, port->port_devid->data, port->port_devid->len); desc = (struct scsi_vpd_id_descriptor *)((uint8_t *)desc + port->port_devid->len); } /* * This is for the Relative Target Port(type 4h) identifier */ desc->proto_codeset = proto | SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_RELTARG; desc->length = 4; scsi_ulto2b(ctsio->io_hdr.nexus.targ_port, &desc->identifier[2]); desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + sizeof(struct scsi_vpd_id_rel_trgt_port_id)); /* * This is for the Target Port Group(type 5h) identifier */ desc->proto_codeset = proto | SVPD_ID_CODESET_BINARY; desc->id_type = SVPD_ID_PIV | SVPD_ID_ASSOC_PORT | SVPD_ID_TYPE_TPORTGRP; desc->length = 4; scsi_ulto2b(ctsio->io_hdr.nexus.targ_port / softc->port_cnt + 1, &desc->identifier[2]); desc = (struct scsi_vpd_id_descriptor *)(&desc->identifier[0] + sizeof(struct scsi_vpd_id_trgt_port_grp_id)); /* * This is for the Target identifier */ if (port && port->target_devid) { memcpy(desc, port->target_devid->data, port->target_devid->len); } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_scsi_ports(struct ctl_scsiio *ctsio, int alloc_len) { struct ctl_softc *softc = control_softc; struct scsi_vpd_scsi_ports *sp; struct scsi_vpd_port_designation *pd; struct scsi_vpd_port_designation_cont *pdc; struct ctl_lun *lun; struct ctl_port *port; int data_len, num_target_ports, iid_len, id_len; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; num_target_ports = 0; iid_len = 0; id_len = 0; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (lun != NULL && ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; num_target_ports++; if (port->init_devid) iid_len += port->init_devid->len; if (port->port_devid) id_len += port->port_devid->len; } mtx_unlock(&softc->ctl_lock); data_len = sizeof(struct scsi_vpd_scsi_ports) + num_target_ports * (sizeof(struct scsi_vpd_port_designation) + sizeof(struct scsi_vpd_port_designation_cont)) + iid_len + id_len; ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); sp = (struct scsi_vpd_scsi_ports *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) sp->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else sp->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; sp->page_code = SVPD_SCSI_PORTS; scsi_ulto2b(data_len - sizeof(struct scsi_vpd_scsi_ports), sp->page_length); pd = &sp->design[0]; mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(port, &softc->port_list, links) { if ((port->status & CTL_PORT_STATUS_ONLINE) == 0) continue; if (lun != NULL && ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; scsi_ulto2b(port->targ_port, pd->relative_port_id); if (port->init_devid) { iid_len = port->init_devid->len; memcpy(pd->initiator_transportid, port->init_devid->data, port->init_devid->len); } else iid_len = 0; scsi_ulto2b(iid_len, pd->initiator_transportid_length); pdc = (struct scsi_vpd_port_designation_cont *) (&pd->initiator_transportid[iid_len]); if (port->port_devid) { id_len = port->port_devid->len; memcpy(pdc->target_port_descriptors, port->port_devid->data, port->port_devid->len); } else id_len = 0; scsi_ulto2b(id_len, pdc->target_port_descriptors_length); pd = (struct scsi_vpd_port_designation *) ((uint8_t *)pdc->target_port_descriptors + id_len); } mtx_unlock(&softc->ctl_lock); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_block_limits *bl_ptr; struct ctl_lun *lun; int bs; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*bl_ptr), M_CTL, M_WAITOK | M_ZERO); bl_ptr = (struct scsi_vpd_block_limits *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sizeof(*bl_ptr) < alloc_len) { ctsio->residual = alloc_len - sizeof(*bl_ptr); ctsio->kern_data_len = sizeof(*bl_ptr); ctsio->kern_total_len = sizeof(*bl_ptr); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) bl_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else bl_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; bl_ptr->page_code = SVPD_BLOCK_LIMITS; scsi_ulto2b(sizeof(*bl_ptr) - 4, bl_ptr->page_length); bl_ptr->max_cmp_write_len = 0xff; scsi_ulto4b(0xffffffff, bl_ptr->max_txfer_len); if (lun != NULL) { bs = lun->be_lun->blocksize; scsi_ulto4b(lun->be_lun->opttxferlen, bl_ptr->opt_txfer_len); if (lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) { scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_lba_cnt); scsi_ulto4b(0xffffffff, bl_ptr->max_unmap_blk_cnt); if (lun->be_lun->ublockexp != 0) { scsi_ulto4b((1 << lun->be_lun->ublockexp), bl_ptr->opt_unmap_grain); scsi_ulto4b(0x80000000 | lun->be_lun->ublockoff, bl_ptr->unmap_grain_align); } } scsi_ulto4b(lun->be_lun->atomicblock, bl_ptr->max_atomic_transfer_length); scsi_ulto4b(0, bl_ptr->atomic_alignment); scsi_ulto4b(0, bl_ptr->atomic_transfer_length_granularity); } scsi_u64to8b(UINT64_MAX, bl_ptr->max_write_same_length); ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_bdc(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_block_device_characteristics *bdc_ptr; struct ctl_lun *lun; const char *value; u_int i; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*bdc_ptr), M_CTL, M_WAITOK | M_ZERO); bdc_ptr = (struct scsi_vpd_block_device_characteristics *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sizeof(*bdc_ptr) < alloc_len) { ctsio->residual = alloc_len - sizeof(*bdc_ptr); ctsio->kern_data_len = sizeof(*bdc_ptr); ctsio->kern_total_len = sizeof(*bdc_ptr); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) bdc_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else bdc_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; bdc_ptr->page_code = SVPD_BDC; scsi_ulto2b(sizeof(*bdc_ptr) - 4, bdc_ptr->page_length); if (lun != NULL && (value = ctl_get_opt(&lun->be_lun->options, "rpm")) != NULL) i = strtol(value, NULL, 0); else i = CTL_DEFAULT_ROTATION_RATE; scsi_ulto2b(i, bdc_ptr->medium_rotation_rate); if (lun != NULL && (value = ctl_get_opt(&lun->be_lun->options, "formfactor")) != NULL) i = strtol(value, NULL, 0); else i = 0; bdc_ptr->wab_wac_ff = (i & 0x0f); bdc_ptr->flags = SVPD_FUAB | SVPD_VBULS; ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } static int ctl_inquiry_evpd_lbp(struct ctl_scsiio *ctsio, int alloc_len) { struct scsi_vpd_logical_block_prov *lbp_ptr; struct ctl_lun *lun; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; ctsio->kern_data_ptr = malloc(sizeof(*lbp_ptr), M_CTL, M_WAITOK | M_ZERO); lbp_ptr = (struct scsi_vpd_logical_block_prov *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; if (sizeof(*lbp_ptr) < alloc_len) { ctsio->residual = alloc_len - sizeof(*lbp_ptr); ctsio->kern_data_len = sizeof(*lbp_ptr); ctsio->kern_total_len = sizeof(*lbp_ptr); } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; ctsio->kern_sg_entries = 0; /* * The control device is always connected. The disk device, on the * other hand, may not be online all the time. Need to change this * to figure out whether the disk device is actually online or not. */ if (lun != NULL) lbp_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; else lbp_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | T_DIRECT; lbp_ptr->page_code = SVPD_LBP; scsi_ulto2b(sizeof(*lbp_ptr) - 4, lbp_ptr->page_length); lbp_ptr->threshold_exponent = CTL_LBP_EXPONENT; if (lun != NULL && lun->be_lun->flags & CTL_LUN_FLAG_UNMAP) { lbp_ptr->flags = SVPD_LBP_UNMAP | SVPD_LBP_WS16 | SVPD_LBP_WS10 | SVPD_LBP_RZ | SVPD_LBP_ANC_SUP; lbp_ptr->prov_type = SVPD_LBP_THIN; } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } /* * INQUIRY with the EVPD bit set. */ static int ctl_inquiry_evpd(struct ctl_scsiio *ctsio) { struct ctl_lun *lun; struct scsi_inquiry *cdb; int alloc_len, retval; lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_inquiry *)ctsio->cdb; alloc_len = scsi_2btoul(cdb->length); switch (cdb->page_code) { case SVPD_SUPPORTED_PAGES: retval = ctl_inquiry_evpd_supported(ctsio, alloc_len); break; case SVPD_UNIT_SERIAL_NUMBER: retval = ctl_inquiry_evpd_serial(ctsio, alloc_len); break; case SVPD_DEVICE_ID: retval = ctl_inquiry_evpd_devid(ctsio, alloc_len); break; case SVPD_EXTENDED_INQUIRY_DATA: retval = ctl_inquiry_evpd_eid(ctsio, alloc_len); break; case SVPD_MODE_PAGE_POLICY: retval = ctl_inquiry_evpd_mpp(ctsio, alloc_len); break; case SVPD_SCSI_PORTS: retval = ctl_inquiry_evpd_scsi_ports(ctsio, alloc_len); break; case SVPD_SCSI_TPC: retval = ctl_inquiry_evpd_tpc(ctsio, alloc_len); break; case SVPD_BLOCK_LIMITS: if (lun == NULL || lun->be_lun->lun_type != T_DIRECT) goto err; retval = ctl_inquiry_evpd_block_limits(ctsio, alloc_len); break; case SVPD_BDC: if (lun == NULL || lun->be_lun->lun_type != T_DIRECT) goto err; retval = ctl_inquiry_evpd_bdc(ctsio, alloc_len); break; case SVPD_LBP: if (lun == NULL || lun->be_lun->lun_type != T_DIRECT) goto err; retval = ctl_inquiry_evpd_lbp(ctsio, alloc_len); break; default: err: ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } /* * Standard INQUIRY data. */ static int ctl_inquiry_std(struct ctl_scsiio *ctsio) { struct scsi_inquiry_data *inq_ptr; struct scsi_inquiry *cdb; struct ctl_softc *softc; struct ctl_port *port; struct ctl_lun *lun; char *val; uint32_t alloc_len, data_len; ctl_port_type port_type; softc = control_softc; /* * Figure out whether we're talking to a Fibre Channel port or not. * We treat the ioctl front end, and any SCSI adapters, as packetized * SCSI front ends. */ port = ctl_io_port(&ctsio->io_hdr); if (port != NULL) port_type = port->port_type; else port_type = CTL_PORT_SCSI; if (port_type == CTL_PORT_IOCTL || port_type == CTL_PORT_INTERNAL) port_type = CTL_PORT_SCSI; lun = ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; cdb = (struct scsi_inquiry *)ctsio->cdb; alloc_len = scsi_2btoul(cdb->length); /* * We malloc the full inquiry data size here and fill it * in. If the user only asks for less, we'll give him * that much. */ data_len = offsetof(struct scsi_inquiry_data, vendor_specific1); ctsio->kern_data_ptr = malloc(data_len, M_CTL, M_WAITOK | M_ZERO); inq_ptr = (struct scsi_inquiry_data *)ctsio->kern_data_ptr; ctsio->kern_sg_entries = 0; ctsio->kern_data_resid = 0; ctsio->kern_rel_offset = 0; if (data_len < alloc_len) { ctsio->residual = alloc_len - data_len; ctsio->kern_data_len = data_len; ctsio->kern_total_len = data_len; } else { ctsio->residual = 0; ctsio->kern_data_len = alloc_len; ctsio->kern_total_len = alloc_len; } if (lun != NULL) { if ((lun->flags & CTL_LUN_PRIMARY_SC) || softc->ha_link >= CTL_HA_LINK_UNKNOWN) { inq_ptr->device = (SID_QUAL_LU_CONNECTED << 5) | lun->be_lun->lun_type; } else { inq_ptr->device = (SID_QUAL_LU_OFFLINE << 5) | lun->be_lun->lun_type; } } else inq_ptr->device = (SID_QUAL_BAD_LU << 5) | T_NODEVICE; /* RMB in byte 2 is 0 */ inq_ptr->version = SCSI_REV_SPC4; /* * According to SAM-3, even if a device only supports a single * level of LUN addressing, it should still set the HISUP bit: * * 4.9.1 Logical unit numbers overview * * All logical unit number formats described in this standard are * hierarchical in structure even when only a single level in that * hierarchy is used. The HISUP bit shall be set to one in the * standard INQUIRY data (see SPC-2) when any logical unit number * format described in this standard is used. Non-hierarchical * formats are outside the scope of this standard. * * Therefore we set the HiSup bit here. * * The reponse format is 2, per SPC-3. */ inq_ptr->response_format = SID_HiSup | 2; inq_ptr->additional_length = data_len - (offsetof(struct scsi_inquiry_data, additional_length) + 1); CTL_DEBUG_PRINT(("additional_length = %d\n", inq_ptr->additional_length)); inq_ptr->spc3_flags = SPC3_SID_3PC | SPC3_SID_TPGS_IMPLICIT; /* 16 bit addressing */ if (port_type == CTL_PORT_SCSI) inq_ptr->spc2_flags = SPC2_SID_ADDR16; /* XXX set the SID_MultiP bit here if we're actually going to respond on multiple ports */ inq_ptr->spc2_flags |= SPC2_SID_MultiP; /* 16 bit data bus, synchronous transfers */ if (port_type == CTL_PORT_SCSI) inq_ptr->flags = SID_WBus16 | SID_Sync; /* * XXX KDM do we want to support tagged queueing on the control * device at all? */ if ((lun == NULL) || (lun->be_lun->lun_type != T_PROCESSOR)) inq_ptr->flags |= SID_CmdQue; /* * Per SPC-3, unused bytes in ASCII strings are filled with spaces. * We have 8 bytes for the vendor name, and 16 bytes for the device * name and 4 bytes for the revision. */ if (lun == NULL || (val = ctl_get_opt(&lun->be_lun->options, "vendor")) == NULL) { strncpy(inq_ptr->vendor, CTL_VENDOR, sizeof(inq_ptr->vendor)); } else { memset(inq_ptr->vendor, ' ', sizeof(inq_ptr->vendor)); strncpy(inq_ptr->vendor, val, min(sizeof(inq_ptr->vendor), strlen(val))); } if (lun == NULL) { strncpy(inq_ptr->product, CTL_DIRECT_PRODUCT, sizeof(inq_ptr->product)); } else if ((val = ctl_get_opt(&lun->be_lun->options, "product")) == NULL) { switch (lun->be_lun->lun_type) { case T_DIRECT: strncpy(inq_ptr->product, CTL_DIRECT_PRODUCT, sizeof(inq_ptr->product)); break; case T_PROCESSOR: strncpy(inq_ptr->product, CTL_PROCESSOR_PRODUCT, sizeof(inq_ptr->product)); break; default: strncpy(inq_ptr->product, CTL_UNKNOWN_PRODUCT, sizeof(inq_ptr->product)); break; } } else { memset(inq_ptr->product, ' ', sizeof(inq_ptr->product)); strncpy(inq_ptr->product, val, min(sizeof(inq_ptr->product), strlen(val))); } /* * XXX make this a macro somewhere so it automatically gets * incremented when we make changes. */ if (lun == NULL || (val = ctl_get_opt(&lun->be_lun->options, "revision")) == NULL) { strncpy(inq_ptr->revision, "0001", sizeof(inq_ptr->revision)); } else { memset(inq_ptr->revision, ' ', sizeof(inq_ptr->revision)); strncpy(inq_ptr->revision, val, min(sizeof(inq_ptr->revision), strlen(val))); } /* * For parallel SCSI, we support double transition and single * transition clocking. We also support QAS (Quick Arbitration * and Selection) and Information Unit transfers on both the * control and array devices. */ if (port_type == CTL_PORT_SCSI) inq_ptr->spi3data = SID_SPI_CLOCK_DT_ST | SID_SPI_QAS | SID_SPI_IUS; /* SAM-5 (no version claimed) */ scsi_ulto2b(0x00A0, inq_ptr->version1); /* SPC-4 (no version claimed) */ scsi_ulto2b(0x0460, inq_ptr->version2); if (port_type == CTL_PORT_FC) { /* FCP-2 ANSI INCITS.350:2003 */ scsi_ulto2b(0x0917, inq_ptr->version3); } else if (port_type == CTL_PORT_SCSI) { /* SPI-4 ANSI INCITS.362:200x */ scsi_ulto2b(0x0B56, inq_ptr->version3); } else if (port_type == CTL_PORT_ISCSI) { /* iSCSI (no version claimed) */ scsi_ulto2b(0x0960, inq_ptr->version3); } else if (port_type == CTL_PORT_SAS) { /* SAS (no version claimed) */ scsi_ulto2b(0x0BE0, inq_ptr->version3); } if (lun == NULL) { /* SBC-4 (no version claimed) */ scsi_ulto2b(0x0600, inq_ptr->version4); } else { switch (lun->be_lun->lun_type) { case T_DIRECT: /* SBC-4 (no version claimed) */ scsi_ulto2b(0x0600, inq_ptr->version4); break; case T_PROCESSOR: default: break; } } ctl_set_success(ctsio); ctsio->io_hdr.flags |= CTL_FLAG_ALLOCATED; ctsio->be_move_done = ctl_config_move_done; ctl_datamove((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } int ctl_inquiry(struct ctl_scsiio *ctsio) { struct scsi_inquiry *cdb; int retval; CTL_DEBUG_PRINT(("ctl_inquiry\n")); cdb = (struct scsi_inquiry *)ctsio->cdb; if (cdb->byte2 & SI_EVPD) retval = ctl_inquiry_evpd(ctsio); else if (cdb->page_code == 0) retval = ctl_inquiry_std(ctsio); else { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 2, /*bit_valid*/ 0, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); return (CTL_RETVAL_COMPLETE); } return (retval); } /* * For known CDB types, parse the LBA and length. */ static int ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len) { if (io->io_hdr.io_type != CTL_IO_SCSI) return (1); switch (io->scsiio.cdb[0]) { case COMPARE_AND_WRITE: { struct scsi_compare_and_write *cdb; cdb = (struct scsi_compare_and_write *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = cdb->length; break; } case READ_6: case WRITE_6: { struct scsi_rw_6 *cdb; cdb = (struct scsi_rw_6 *)io->scsiio.cdb; *lba = scsi_3btoul(cdb->addr); /* only 5 bits are valid in the most significant address byte */ *lba &= 0x1fffff; *len = cdb->length; break; } case READ_10: case WRITE_10: { struct scsi_rw_10 *cdb; cdb = (struct scsi_rw_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case WRITE_VERIFY_10: { struct scsi_write_verify_10 *cdb; cdb = (struct scsi_write_verify_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case READ_12: case WRITE_12: { struct scsi_rw_12 *cdb; cdb = (struct scsi_rw_12 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case WRITE_VERIFY_12: { struct scsi_write_verify_12 *cdb; cdb = (struct scsi_write_verify_12 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case READ_16: case WRITE_16: case WRITE_ATOMIC_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case WRITE_VERIFY_16: { struct scsi_write_verify_16 *cdb; cdb = (struct scsi_write_verify_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case WRITE_SAME_10: { struct scsi_write_same_10 *cdb; cdb = (struct scsi_write_same_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case WRITE_SAME_16: { struct scsi_write_same_16 *cdb; cdb = (struct scsi_write_same_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case VERIFY_10: { struct scsi_verify_10 *cdb; cdb = (struct scsi_verify_10 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_2btoul(cdb->length); break; } case VERIFY_12: { struct scsi_verify_12 *cdb; cdb = (struct scsi_verify_12 *)io->scsiio.cdb; *lba = scsi_4btoul(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case VERIFY_16: { struct scsi_verify_16 *cdb; cdb = (struct scsi_verify_16 *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = scsi_4btoul(cdb->length); break; } case UNMAP: { *lba = 0; *len = UINT64_MAX; break; } case SERVICE_ACTION_IN: { /* GET LBA STATUS */ struct scsi_get_lba_status *cdb; cdb = (struct scsi_get_lba_status *)io->scsiio.cdb; *lba = scsi_8btou64(cdb->addr); *len = UINT32_MAX; break; } default: return (1); break; /* NOTREACHED */ } return (0); } static ctl_action ctl_extent_check_lba(uint64_t lba1, uint64_t len1, uint64_t lba2, uint64_t len2, bool seq) { uint64_t endlba1, endlba2; endlba1 = lba1 + len1 - (seq ? 0 : 1); endlba2 = lba2 + len2 - 1; if ((endlba1 < lba2) || (endlba2 < lba1)) return (CTL_ACTION_PASS); else return (CTL_ACTION_BLOCK); } static int ctl_extent_check_unmap(union ctl_io *io, uint64_t lba2, uint64_t len2) { struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_desc *buf, *end, *range; uint64_t lba; uint32_t len; /* If not UNMAP -- go other way. */ if (io->io_hdr.io_type != CTL_IO_SCSI || io->scsiio.cdb[0] != UNMAP) return (CTL_ACTION_ERROR); /* If UNMAP without data -- block and wait for data. */ ptrlen = (struct ctl_ptr_len_flags *) &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; if ((io->io_hdr.flags & CTL_FLAG_ALLOCATED) == 0 || ptrlen->ptr == NULL) return (CTL_ACTION_BLOCK); /* UNMAP with data -- check for collision. */ buf = (struct scsi_unmap_desc *)ptrlen->ptr; end = buf + ptrlen->len / sizeof(*buf); for (range = buf; range < end; range++) { lba = scsi_8btou64(range->lba); len = scsi_4btoul(range->length); if ((lba < lba2 + len2) && (lba + len > lba2)) return (CTL_ACTION_BLOCK); } return (CTL_ACTION_PASS); } static ctl_action ctl_extent_check(union ctl_io *io1, union ctl_io *io2, bool seq) { uint64_t lba1, lba2; uint64_t len1, len2; int retval; if (ctl_get_lba_len(io2, &lba2, &len2) != 0) return (CTL_ACTION_ERROR); retval = ctl_extent_check_unmap(io1, lba2, len2); if (retval != CTL_ACTION_ERROR) return (retval); if (ctl_get_lba_len(io1, &lba1, &len1) != 0) return (CTL_ACTION_ERROR); if (io1->io_hdr.flags & CTL_FLAG_SERSEQ_DONE) seq = FALSE; return (ctl_extent_check_lba(lba1, len1, lba2, len2, seq)); } static ctl_action ctl_extent_check_seq(union ctl_io *io1, union ctl_io *io2) { uint64_t lba1, lba2; uint64_t len1, len2; if (io1->io_hdr.flags & CTL_FLAG_SERSEQ_DONE) return (CTL_ACTION_PASS); if (ctl_get_lba_len(io1, &lba1, &len1) != 0) return (CTL_ACTION_ERROR); if (ctl_get_lba_len(io2, &lba2, &len2) != 0) return (CTL_ACTION_ERROR); if (lba1 + len1 == lba2) return (CTL_ACTION_BLOCK); return (CTL_ACTION_PASS); } static ctl_action ctl_check_for_blockage(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *ooa_io) { const struct ctl_cmd_entry *pending_entry, *ooa_entry; ctl_serialize_action *serialize_row; /* * The initiator attempted multiple untagged commands at the same * time. Can't do that. */ if ((pending_io->scsiio.tag_type == CTL_TAG_UNTAGGED) && (ooa_io->scsiio.tag_type == CTL_TAG_UNTAGGED) && ((pending_io->io_hdr.nexus.targ_port == ooa_io->io_hdr.nexus.targ_port) && (pending_io->io_hdr.nexus.initid == ooa_io->io_hdr.nexus.initid)) && ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT | CTL_FLAG_STATUS_SENT)) == 0)) return (CTL_ACTION_OVERLAP); /* * The initiator attempted to send multiple tagged commands with * the same ID. (It's fine if different initiators have the same * tag ID.) * * Even if all of those conditions are true, we don't kill the I/O * if the command ahead of us has been aborted. We won't end up * sending it to the FETD, and it's perfectly legal to resend a * command with the same tag number as long as the previous * instance of this tag number has been aborted somehow. */ if ((pending_io->scsiio.tag_type != CTL_TAG_UNTAGGED) && (ooa_io->scsiio.tag_type != CTL_TAG_UNTAGGED) && (pending_io->scsiio.tag_num == ooa_io->scsiio.tag_num) && ((pending_io->io_hdr.nexus.targ_port == ooa_io->io_hdr.nexus.targ_port) && (pending_io->io_hdr.nexus.initid == ooa_io->io_hdr.nexus.initid)) && ((ooa_io->io_hdr.flags & (CTL_FLAG_ABORT | CTL_FLAG_STATUS_SENT)) == 0)) return (CTL_ACTION_OVERLAP_TAG); /* * If we get a head of queue tag, SAM-3 says that we should * immediately execute it. * * What happens if this command would normally block for some other * reason? e.g. a request sense with a head of queue tag * immediately after a write. Normally that would block, but this * will result in its getting executed immediately... * * We currently return "pass" instead of "skip", so we'll end up * going through the rest of the queue to check for overlapped tags. * * XXX KDM check for other types of blockage first?? */ if (pending_io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE) return (CTL_ACTION_PASS); /* * Ordered tags have to block until all items ahead of them * have completed. If we get called with an ordered tag, we always * block, if something else is ahead of us in the queue. */ if (pending_io->scsiio.tag_type == CTL_TAG_ORDERED) return (CTL_ACTION_BLOCK); /* * Simple tags get blocked until all head of queue and ordered tags * ahead of them have completed. I'm lumping untagged commands in * with simple tags here. XXX KDM is that the right thing to do? */ if (((pending_io->scsiio.tag_type == CTL_TAG_UNTAGGED) || (pending_io->scsiio.tag_type == CTL_TAG_SIMPLE)) && ((ooa_io->scsiio.tag_type == CTL_TAG_HEAD_OF_QUEUE) || (ooa_io->scsiio.tag_type == CTL_TAG_ORDERED))) return (CTL_ACTION_BLOCK); pending_entry = ctl_get_cmd_entry(&pending_io->scsiio, NULL); ooa_entry = ctl_get_cmd_entry(&ooa_io->scsiio, NULL); serialize_row = ctl_serialize_table[ooa_entry->seridx]; switch (serialize_row[pending_entry->seridx]) { case CTL_SER_BLOCK: return (CTL_ACTION_BLOCK); case CTL_SER_EXTENT: return (ctl_extent_check(ooa_io, pending_io, (lun->be_lun && lun->be_lun->serseq == CTL_LUN_SERSEQ_ON))); case CTL_SER_EXTENTOPT: if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT].queue_flags & SCP_QUEUE_ALG_MASK) != SCP_QUEUE_ALG_UNRESTRICTED) return (ctl_extent_check(ooa_io, pending_io, (lun->be_lun && lun->be_lun->serseq == CTL_LUN_SERSEQ_ON))); return (CTL_ACTION_PASS); case CTL_SER_EXTENTSEQ: if (lun->be_lun && lun->be_lun->serseq != CTL_LUN_SERSEQ_OFF) return (ctl_extent_check_seq(ooa_io, pending_io)); return (CTL_ACTION_PASS); case CTL_SER_PASS: return (CTL_ACTION_PASS); case CTL_SER_BLOCKOPT: if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT].queue_flags & SCP_QUEUE_ALG_MASK) != SCP_QUEUE_ALG_UNRESTRICTED) return (CTL_ACTION_BLOCK); return (CTL_ACTION_PASS); case CTL_SER_SKIP: return (CTL_ACTION_SKIP); default: panic("invalid serialization value %d", serialize_row[pending_entry->seridx]); } return (CTL_ACTION_ERROR); } /* * Check for blockage or overlaps against the OOA (Order Of Arrival) queue. * Assumptions: * - pending_io is generally either incoming, or on the blocked queue * - starting I/O is the I/O we want to start the check with. */ static ctl_action ctl_check_ooa(struct ctl_lun *lun, union ctl_io *pending_io, union ctl_io *starting_io) { union ctl_io *ooa_io; ctl_action action; mtx_assert(&lun->lun_lock, MA_OWNED); /* * Run back along the OOA queue, starting with the current * blocked I/O and going through every I/O before it on the * queue. If starting_io is NULL, we'll just end up returning * CTL_ACTION_PASS. */ for (ooa_io = starting_io; ooa_io != NULL; ooa_io = (union ctl_io *)TAILQ_PREV(&ooa_io->io_hdr, ctl_ooaq, ooa_links)){ /* * This routine just checks to see whether * cur_blocked is blocked by ooa_io, which is ahead * of it in the queue. It doesn't queue/dequeue * cur_blocked. */ action = ctl_check_for_blockage(lun, pending_io, ooa_io); switch (action) { case CTL_ACTION_BLOCK: case CTL_ACTION_OVERLAP: case CTL_ACTION_OVERLAP_TAG: case CTL_ACTION_SKIP: case CTL_ACTION_ERROR: return (action); break; /* NOTREACHED */ case CTL_ACTION_PASS: break; default: panic("invalid action %d", action); break; /* NOTREACHED */ } } return (CTL_ACTION_PASS); } /* * Assumptions: * - An I/O has just completed, and has been removed from the per-LUN OOA * queue, so some items on the blocked queue may now be unblocked. */ static int ctl_check_blocked(struct ctl_lun *lun) { struct ctl_softc *softc = lun->ctl_softc; union ctl_io *cur_blocked, *next_blocked; mtx_assert(&lun->lun_lock, MA_OWNED); /* * Run forward from the head of the blocked queue, checking each * entry against the I/Os prior to it on the OOA queue to see if * there is still any blockage. * * We cannot use the TAILQ_FOREACH() macro, because it can't deal * with our removing a variable on it while it is traversing the * list. */ for (cur_blocked = (union ctl_io *)TAILQ_FIRST(&lun->blocked_queue); cur_blocked != NULL; cur_blocked = next_blocked) { union ctl_io *prev_ooa; ctl_action action; next_blocked = (union ctl_io *)TAILQ_NEXT(&cur_blocked->io_hdr, blocked_links); prev_ooa = (union ctl_io *)TAILQ_PREV(&cur_blocked->io_hdr, ctl_ooaq, ooa_links); /* * If cur_blocked happens to be the first item in the OOA * queue now, prev_ooa will be NULL, and the action * returned will just be CTL_ACTION_PASS. */ action = ctl_check_ooa(lun, cur_blocked, prev_ooa); switch (action) { case CTL_ACTION_BLOCK: /* Nothing to do here, still blocked */ break; case CTL_ACTION_OVERLAP: case CTL_ACTION_OVERLAP_TAG: /* * This shouldn't happen! In theory we've already * checked this command for overlap... */ break; case CTL_ACTION_PASS: case CTL_ACTION_SKIP: { const struct ctl_cmd_entry *entry; /* * The skip case shouldn't happen, this transaction * should have never made it onto the blocked queue. */ /* * This I/O is no longer blocked, we can remove it * from the blocked queue. Since this is a TAILQ * (doubly linked list), we can do O(1) removals * from any place on the list. */ TAILQ_REMOVE(&lun->blocked_queue, &cur_blocked->io_hdr, blocked_links); cur_blocked->io_hdr.flags &= ~CTL_FLAG_BLOCKED; if ((softc->ha_mode != CTL_HA_MODE_XFER) && (cur_blocked->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)){ /* * Need to send IO back to original side to * run */ union ctl_ha_msg msg_info; cur_blocked->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; msg_info.hdr.original_sc = cur_blocked->io_hdr.original_sc; msg_info.hdr.serializing_sc = cur_blocked; msg_info.hdr.msg_type = CTL_MSG_R2R; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.hdr), M_NOWAIT); break; } entry = ctl_get_cmd_entry(&cur_blocked->scsiio, NULL); /* * Check this I/O for LUN state changes that may * have happened while this command was blocked. * The LUN state may have been changed by a command * ahead of us in the queue, so we need to re-check * for any states that can be caused by SCSI * commands. */ if (ctl_scsiio_lun_check(lun, entry, &cur_blocked->scsiio) == 0) { cur_blocked->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr(cur_blocked); } else ctl_done(cur_blocked); break; } default: /* * This probably shouldn't happen -- we shouldn't * get CTL_ACTION_ERROR, or anything else. */ break; } } return (CTL_RETVAL_COMPLETE); } /* * This routine (with one exception) checks LUN flags that can be set by * commands ahead of us in the OOA queue. These flags have to be checked * when a command initially comes in, and when we pull a command off the * blocked queue and are preparing to execute it. The reason we have to * check these flags for commands on the blocked queue is that the LUN * state may have been changed by a command ahead of us while we're on the * blocked queue. * * Ordering is somewhat important with these checks, so please pay * careful attention to the placement of any new checks. */ static int ctl_scsiio_lun_check(struct ctl_lun *lun, const struct ctl_cmd_entry *entry, struct ctl_scsiio *ctsio) { struct ctl_softc *softc = lun->ctl_softc; int retval; uint32_t residx; retval = 0; mtx_assert(&lun->lun_lock, MA_OWNED); /* * If this shelf is a secondary shelf controller, we may have to * reject some commands disallowed by HA mode and link state. */ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0) { if (softc->ha_link == CTL_HA_LINK_OFFLINE && (entry->flags & CTL_CMD_FLAG_OK_ON_UNAVAIL) == 0) { ctl_set_lun_unavail(ctsio); retval = 1; goto bailout; } if ((lun->flags & CTL_LUN_PEER_SC_PRIMARY) == 0 && (entry->flags & CTL_CMD_FLAG_OK_ON_UNAVAIL) == 0) { ctl_set_lun_transit(ctsio); retval = 1; goto bailout; } if (softc->ha_mode == CTL_HA_MODE_ACT_STBY && (entry->flags & CTL_CMD_FLAG_OK_ON_STANDBY) == 0) { ctl_set_lun_standby(ctsio); retval = 1; goto bailout; } /* The rest of checks are only done on executing side */ if (softc->ha_mode == CTL_HA_MODE_XFER) goto bailout; } if (entry->pattern & CTL_LUN_PAT_WRITE) { if (lun->be_lun && lun->be_lun->flags & CTL_LUN_FLAG_READONLY) { ctl_set_hw_write_protected(ctsio); retval = 1; goto bailout; } if ((lun->mode_pages.control_page[CTL_PAGE_CURRENT] .eca_and_aen & SCP_SWP) != 0) { ctl_set_sense(ctsio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_DATA_PROTECT, /*asc*/ 0x27, /*ascq*/ 0x02, SSD_ELEM_NONE); retval = 1; goto bailout; } } /* * Check for a reservation conflict. If this command isn't allowed * even on reserved LUNs, and if this initiator isn't the one who * reserved us, reject the command with a reservation conflict. */ residx = ctl_get_initindex(&ctsio->io_hdr.nexus); if ((lun->flags & CTL_LUN_RESERVED) && ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_RESV) == 0)) { if (lun->res_idx != residx) { ctl_set_reservation_conflict(ctsio); retval = 1; goto bailout; } } if ((lun->flags & CTL_LUN_PR_RESERVED) == 0 || (entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_RESV)) { /* No reservation or command is allowed. */; } else if ((entry->flags & CTL_CMD_FLAG_ALLOW_ON_PR_WRESV) && (lun->res_type == SPR_TYPE_WR_EX || lun->res_type == SPR_TYPE_WR_EX_RO || lun->res_type == SPR_TYPE_WR_EX_AR)) { /* The command is allowed for Write Exclusive resv. */; } else { /* * if we aren't registered or it's a res holder type * reservation and this isn't the res holder then set a * conflict. */ if (ctl_get_prkey(lun, residx) == 0 || (residx != lun->pr_res_idx && lun->res_type < 4)) { ctl_set_reservation_conflict(ctsio); retval = 1; goto bailout; } } if ((lun->flags & CTL_LUN_OFFLINE) && ((entry->flags & CTL_CMD_FLAG_OK_ON_STANDBY) == 0)) { ctl_set_lun_not_ready(ctsio); retval = 1; goto bailout; } if ((lun->flags & CTL_LUN_STOPPED) && ((entry->flags & CTL_CMD_FLAG_OK_ON_STOPPED) == 0)) { /* "Logical unit not ready, initializing cmd. required" */ ctl_set_lun_stopped(ctsio); retval = 1; goto bailout; } if ((lun->flags & CTL_LUN_INOPERABLE) && ((entry->flags & CTL_CMD_FLAG_OK_ON_INOPERABLE) == 0)) { /* "Medium format corrupted" */ ctl_set_medium_format_corrupted(ctsio); retval = 1; goto bailout; } bailout: return (retval); } static void ctl_failover_io(union ctl_io *io, int have_lock) { ctl_set_busy(&io->scsiio); ctl_done(io); } static void ctl_failover_lun(struct ctl_lun *lun) { struct ctl_softc *softc = lun->ctl_softc; struct ctl_io_hdr *io, *next_io; CTL_DEBUG_PRINT(("FAILOVER for lun %ju\n", lun->lun)); if (softc->ha_mode == CTL_HA_MODE_XFER) { TAILQ_FOREACH_SAFE(io, &lun->ooa_queue, ooa_links, next_io) { /* We are master */ if (io->flags & CTL_FLAG_FROM_OTHER_SC) { if (io->flags & CTL_FLAG_IO_ACTIVE) { io->flags |= CTL_FLAG_ABORT; io->flags |= CTL_FLAG_FAILOVER; } else { /* This can be only due to DATAMOVE */ io->msg_type = CTL_MSG_DATAMOVE_DONE; io->flags &= ~CTL_FLAG_DMA_INPROG; io->flags |= CTL_FLAG_IO_ACTIVE; io->port_status = 31340; ctl_enqueue_isc((union ctl_io *)io); } } /* We are slave */ if (io->flags & CTL_FLAG_SENT_2OTHER_SC) { io->flags &= ~CTL_FLAG_SENT_2OTHER_SC; if (io->flags & CTL_FLAG_IO_ACTIVE) { io->flags |= CTL_FLAG_FAILOVER; } else { ctl_set_busy(&((union ctl_io *)io)-> scsiio); ctl_done((union ctl_io *)io); } } } } else { /* SERIALIZE modes */ TAILQ_FOREACH_SAFE(io, &lun->blocked_queue, blocked_links, next_io) { /* We are master */ if (io->flags & CTL_FLAG_FROM_OTHER_SC) { TAILQ_REMOVE(&lun->blocked_queue, io, blocked_links); io->flags &= ~CTL_FLAG_BLOCKED; TAILQ_REMOVE(&lun->ooa_queue, io, ooa_links); ctl_free_io((union ctl_io *)io); } } TAILQ_FOREACH_SAFE(io, &lun->ooa_queue, ooa_links, next_io) { /* We are master */ if (io->flags & CTL_FLAG_FROM_OTHER_SC) { TAILQ_REMOVE(&lun->ooa_queue, io, ooa_links); ctl_free_io((union ctl_io *)io); } /* We are slave */ if (io->flags & CTL_FLAG_SENT_2OTHER_SC) { io->flags &= ~CTL_FLAG_SENT_2OTHER_SC; if (!(io->flags & CTL_FLAG_IO_ACTIVE)) { ctl_set_busy(&((union ctl_io *)io)-> scsiio); ctl_done((union ctl_io *)io); } } } ctl_check_blocked(lun); } } static int ctl_scsiio_precheck(struct ctl_softc *softc, struct ctl_scsiio *ctsio) { struct ctl_lun *lun; const struct ctl_cmd_entry *entry; uint32_t initidx, targ_lun; int retval; retval = 0; lun = NULL; targ_lun = ctsio->io_hdr.nexus.targ_mapped_lun; if ((targ_lun < CTL_MAX_LUNS) && ((lun = softc->ctl_luns[targ_lun]) != NULL)) { /* * If the LUN is invalid, pretend that it doesn't exist. * It will go away as soon as all pending I/O has been * completed. */ mtx_lock(&lun->lun_lock); if (lun->flags & CTL_LUN_DISABLED) { mtx_unlock(&lun->lun_lock); lun = NULL; ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = NULL; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = NULL; } else { ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = lun; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = lun->be_lun; /* * Every I/O goes into the OOA queue for a * particular LUN, and stays there until completion. */ #ifdef CTL_TIME_IO if (TAILQ_EMPTY(&lun->ooa_queue)) { lun->idle_time += getsbinuptime() - lun->last_busy; } #endif TAILQ_INSERT_TAIL(&lun->ooa_queue, &ctsio->io_hdr, ooa_links); } } else { ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr = NULL; ctsio->io_hdr.ctl_private[CTL_PRIV_BACKEND_LUN].ptr = NULL; } /* Get command entry and return error if it is unsuppotyed. */ entry = ctl_validate_command(ctsio); if (entry == NULL) { if (lun) mtx_unlock(&lun->lun_lock); return (retval); } ctsio->io_hdr.flags &= ~CTL_FLAG_DATA_MASK; ctsio->io_hdr.flags |= entry->flags & CTL_FLAG_DATA_MASK; /* * Check to see whether we can send this command to LUNs that don't * exist. This should pretty much only be the case for inquiry * and request sense. Further checks, below, really require having * a LUN, so we can't really check the command anymore. Just put * it on the rtr queue. */ if (lun == NULL) { if (entry->flags & CTL_CMD_FLAG_OK_ON_NO_LUN) { ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; ctl_enqueue_rtr((union ctl_io *)ctsio); return (retval); } ctl_set_unsupported_lun(ctsio); ctl_done((union ctl_io *)ctsio); CTL_DEBUG_PRINT(("ctl_scsiio_precheck: bailing out due to invalid LUN\n")); return (retval); } else { /* * Make sure we support this particular command on this LUN. * e.g., we don't support writes to the control LUN. */ if (!ctl_cmd_applicable(lun->be_lun->lun_type, entry)) { mtx_unlock(&lun->lun_lock); ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (retval); } } initidx = ctl_get_initindex(&ctsio->io_hdr.nexus); #ifdef CTL_WITH_CA /* * If we've got a request sense, it'll clear the contingent * allegiance condition. Otherwise, if we have a CA condition for * this initiator, clear it, because it sent down a command other * than request sense. */ if ((ctsio->cdb[0] != REQUEST_SENSE) && (ctl_is_set(lun->have_ca, initidx))) ctl_clear_mask(lun->have_ca, initidx); #endif /* * If the command has this flag set, it handles its own unit * attention reporting, we shouldn't do anything. Otherwise we * check for any pending unit attentions, and send them back to the * initiator. We only do this when a command initially comes in, * not when we pull it off the blocked queue. * * According to SAM-3, section 5.3.2, the order that things get * presented back to the host is basically unit attentions caused * by some sort of reset event, busy status, reservation conflicts * or task set full, and finally any other status. * * One issue here is that some of the unit attentions we report * don't fall into the "reset" category (e.g. "reported luns data * has changed"). So reporting it here, before the reservation * check, may be technically wrong. I guess the only thing to do * would be to check for and report the reset events here, and then * check for the other unit attention types after we check for a * reservation conflict. * * XXX KDM need to fix this */ if ((entry->flags & CTL_CMD_FLAG_NO_SENSE) == 0) { ctl_ua_type ua_type; ua_type = ctl_build_ua(lun, initidx, &ctsio->sense_data, SSD_TYPE_NONE); if (ua_type != CTL_UA_NONE) { mtx_unlock(&lun->lun_lock); ctsio->scsi_status = SCSI_STATUS_CHECK_COND; ctsio->io_hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; ctsio->sense_len = SSD_FULL_SIZE; ctl_done((union ctl_io *)ctsio); return (retval); } } if (ctl_scsiio_lun_check(lun, entry, ctsio) != 0) { mtx_unlock(&lun->lun_lock); ctl_done((union ctl_io *)ctsio); return (retval); } /* * XXX CHD this is where we want to send IO to other side if * this LUN is secondary on this SC. We will need to make a copy * of the IO and flag the IO on this side as SENT_2OTHER and the flag * the copy we send as FROM_OTHER. * We also need to stuff the address of the original IO so we can * find it easily. Something similar will need be done on the other * side so when we are done we can find the copy. */ if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 && (lun->flags & CTL_LUN_PEER_SC_PRIMARY) != 0 && (entry->flags & CTL_CMD_FLAG_RUN_HERE) == 0) { union ctl_ha_msg msg_info; int isc_retval; ctsio->io_hdr.flags |= CTL_FLAG_SENT_2OTHER_SC; ctsio->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; mtx_unlock(&lun->lun_lock); msg_info.hdr.msg_type = CTL_MSG_SERIALIZE; msg_info.hdr.original_sc = (union ctl_io *)ctsio; msg_info.hdr.serializing_sc = NULL; msg_info.hdr.nexus = ctsio->io_hdr.nexus; msg_info.scsi.tag_num = ctsio->tag_num; msg_info.scsi.tag_type = ctsio->tag_type; msg_info.scsi.cdb_len = ctsio->cdb_len; memcpy(msg_info.scsi.cdb, ctsio->cdb, CTL_MAX_CDBLEN); if ((isc_retval = ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.scsi) - sizeof(msg_info.scsi.sense_data), M_WAITOK)) > CTL_HA_STATUS_SUCCESS) { ctl_set_busy(ctsio); ctl_done((union ctl_io *)ctsio); return (retval); } return (retval); } switch (ctl_check_ooa(lun, (union ctl_io *)ctsio, (union ctl_io *)TAILQ_PREV(&ctsio->io_hdr, ctl_ooaq, ooa_links))) { case CTL_ACTION_BLOCK: ctsio->io_hdr.flags |= CTL_FLAG_BLOCKED; TAILQ_INSERT_TAIL(&lun->blocked_queue, &ctsio->io_hdr, blocked_links); mtx_unlock(&lun->lun_lock); return (retval); case CTL_ACTION_PASS: case CTL_ACTION_SKIP: ctsio->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; mtx_unlock(&lun->lun_lock); ctl_enqueue_rtr((union ctl_io *)ctsio); break; case CTL_ACTION_OVERLAP: mtx_unlock(&lun->lun_lock); ctl_set_overlapped_cmd(ctsio); ctl_done((union ctl_io *)ctsio); break; case CTL_ACTION_OVERLAP_TAG: mtx_unlock(&lun->lun_lock); ctl_set_overlapped_tag(ctsio, ctsio->tag_num & 0xff); ctl_done((union ctl_io *)ctsio); break; case CTL_ACTION_ERROR: default: mtx_unlock(&lun->lun_lock); ctl_set_internal_failure(ctsio, /*sks_valid*/ 0, /*retry_count*/ 0); ctl_done((union ctl_io *)ctsio); break; } return (retval); } const struct ctl_cmd_entry * ctl_get_cmd_entry(struct ctl_scsiio *ctsio, int *sa) { const struct ctl_cmd_entry *entry; int service_action; entry = &ctl_cmd_table[ctsio->cdb[0]]; if (sa) *sa = ((entry->flags & CTL_CMD_FLAG_SA5) != 0); if (entry->flags & CTL_CMD_FLAG_SA5) { service_action = ctsio->cdb[1] & SERVICE_ACTION_MASK; entry = &((const struct ctl_cmd_entry *) entry->execute)[service_action]; } return (entry); } const struct ctl_cmd_entry * ctl_validate_command(struct ctl_scsiio *ctsio) { const struct ctl_cmd_entry *entry; int i, sa; uint8_t diff; entry = ctl_get_cmd_entry(ctsio, &sa); if (entry->execute == NULL) { if (sa) ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); else ctl_set_invalid_opcode(ctsio); ctl_done((union ctl_io *)ctsio); return (NULL); } KASSERT(entry->length > 0, ("Not defined length for command 0x%02x/0x%02x", ctsio->cdb[0], ctsio->cdb[1])); for (i = 1; i < entry->length; i++) { diff = ctsio->cdb[i] & ~entry->usage[i - 1]; if (diff == 0) continue; ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ i, /*bit_valid*/ 1, /*bit*/ fls(diff) - 1); ctl_done((union ctl_io *)ctsio); return (NULL); } return (entry); } static int ctl_cmd_applicable(uint8_t lun_type, const struct ctl_cmd_entry *entry) { switch (lun_type) { case T_PROCESSOR: if ((entry->flags & CTL_CMD_FLAG_OK_ON_PROC) == 0) return (0); break; case T_DIRECT: if ((entry->flags & CTL_CMD_FLAG_OK_ON_SLUN) == 0) return (0); break; default: return (0); } return (1); } static int ctl_scsiio(struct ctl_scsiio *ctsio) { int retval; const struct ctl_cmd_entry *entry; retval = CTL_RETVAL_COMPLETE; CTL_DEBUG_PRINT(("ctl_scsiio cdb[0]=%02X\n", ctsio->cdb[0])); entry = ctl_get_cmd_entry(ctsio, NULL); /* * If this I/O has been aborted, just send it straight to * ctl_done() without executing it. */ if (ctsio->io_hdr.flags & CTL_FLAG_ABORT) { ctl_done((union ctl_io *)ctsio); goto bailout; } /* * All the checks should have been handled by ctl_scsiio_precheck(). * We should be clear now to just execute the I/O. */ retval = entry->execute(ctsio); bailout: return (retval); } /* * Since we only implement one target right now, a bus reset simply resets * our single target. */ static int ctl_bus_reset(struct ctl_softc *softc, union ctl_io *io) { return(ctl_target_reset(softc, io, CTL_UA_BUS_RESET)); } static int ctl_target_reset(struct ctl_softc *softc, union ctl_io *io, ctl_ua_type ua_type) { struct ctl_port *port; struct ctl_lun *lun; int retval; if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg_info; msg_info.hdr.nexus = io->io_hdr.nexus; if (ua_type==CTL_UA_TARG_RESET) msg_info.task.task_action = CTL_TASK_TARGET_RESET; else msg_info.task.task_action = CTL_TASK_BUS_RESET; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.task), M_WAITOK); } retval = 0; mtx_lock(&softc->ctl_lock); port = softc->ctl_ports[io->io_hdr.nexus.targ_port]; STAILQ_FOREACH(lun, &softc->lun_list, links) { if (port != NULL && ctl_lun_map_to_port(port, lun->lun) >= CTL_MAX_LUNS) continue; retval += ctl_do_lun_reset(lun, io, ua_type); } mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; return (retval); } /* * The LUN should always be set. The I/O is optional, and is used to * distinguish between I/Os sent by this initiator, and by other * initiators. We set unit attention for initiators other than this one. * SAM-3 is vague on this point. It does say that a unit attention should * be established for other initiators when a LUN is reset (see section * 5.7.3), but it doesn't specifically say that the unit attention should * be established for this particular initiator when a LUN is reset. Here * is the relevant text, from SAM-3 rev 8: * * 5.7.2 When a SCSI initiator port aborts its own tasks * * When a SCSI initiator port causes its own task(s) to be aborted, no * notification that the task(s) have been aborted shall be returned to * the SCSI initiator port other than the completion response for the * command or task management function action that caused the task(s) to * be aborted and notification(s) associated with related effects of the * action (e.g., a reset unit attention condition). * * XXX KDM for now, we're setting unit attention for all initiators. */ static int ctl_do_lun_reset(struct ctl_lun *lun, union ctl_io *io, ctl_ua_type ua_type) { union ctl_io *xio; #if 0 uint32_t initidx; #endif #ifdef CTL_WITH_CA int i; #endif mtx_lock(&lun->lun_lock); /* * Run through the OOA queue and abort each I/O. */ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { xio->io_hdr.flags |= CTL_FLAG_ABORT | CTL_FLAG_ABORT_STATUS; } /* * This version sets unit attention for every */ #if 0 initidx = ctl_get_initindex(&io->io_hdr.nexus); ctl_est_ua_all(lun, initidx, ua_type); #else ctl_est_ua_all(lun, -1, ua_type); #endif /* * A reset (any kind, really) clears reservations established with * RESERVE/RELEASE. It does not clear reservations established * with PERSISTENT RESERVE OUT, but we don't support that at the * moment anyway. See SPC-2, section 5.6. SPC-3 doesn't address * reservations made with the RESERVE/RELEASE commands, because * those commands are obsolete in SPC-3. */ lun->flags &= ~CTL_LUN_RESERVED; #ifdef CTL_WITH_CA for (i = 0; i < CTL_MAX_INITIATORS; i++) ctl_clear_mask(lun->have_ca, i); #endif mtx_unlock(&lun->lun_lock); return (0); } static int ctl_lun_reset(struct ctl_softc *softc, union ctl_io *io) { struct ctl_lun *lun; uint32_t targ_lun; int retval; targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || (lun = softc->ctl_luns[targ_lun]) == NULL) { mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST; return (1); } retval = ctl_do_lun_reset(lun, io, CTL_UA_LUN_RESET); mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0) { union ctl_ha_msg msg_info; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.nexus = io->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_LUN_RESET; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.task), M_WAITOK); } return (retval); } static void ctl_abort_tasks_lun(struct ctl_lun *lun, uint32_t targ_port, uint32_t init_id, int other_sc) { union ctl_io *xio; mtx_assert(&lun->lun_lock, MA_OWNED); /* * Run through the OOA queue and attempt to find the given I/O. * The target port, initiator ID, tag type and tag number have to * match the values that we got from the initiator. If we have an * untagged command to abort, simply abort the first untagged command * we come to. We only allow one untagged command at a time of course. */ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { if ((targ_port == UINT32_MAX || targ_port == xio->io_hdr.nexus.targ_port) && (init_id == UINT32_MAX || init_id == xio->io_hdr.nexus.initid)) { if (targ_port != xio->io_hdr.nexus.targ_port || init_id != xio->io_hdr.nexus.initid) xio->io_hdr.flags |= CTL_FLAG_ABORT_STATUS; xio->io_hdr.flags |= CTL_FLAG_ABORT; if (!other_sc && !(lun->flags & CTL_LUN_PRIMARY_SC)) { union ctl_ha_msg msg_info; msg_info.hdr.nexus = xio->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_ABORT_TASK; msg_info.task.tag_num = xio->scsiio.tag_num; msg_info.task.tag_type = xio->scsiio.tag_type; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.task), M_NOWAIT); } } } } static int ctl_abort_task_set(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; uint32_t targ_lun; /* * Look up the LUN. */ targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || (lun = softc->ctl_luns[targ_lun]) == NULL) { mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST; return (1); } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); if (io->taskio.task_action == CTL_TASK_ABORT_TASK_SET) { ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.initid, (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0); } else { /* CTL_TASK_CLEAR_TASK_SET */ ctl_abort_tasks_lun(lun, UINT32_MAX, UINT32_MAX, (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) != 0); } mtx_unlock(&lun->lun_lock); io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; return (0); } static int ctl_i_t_nexus_reset(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_lun *lun; uint32_t initidx; if (!(io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg_info; msg_info.hdr.nexus = io->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_I_T_NEXUS_RESET; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.task), M_WAITOK); } initidx = ctl_get_initindex(&io->io_hdr.nexus); mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { mtx_lock(&lun->lun_lock); ctl_abort_tasks_lun(lun, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.initid, 1); #ifdef CTL_WITH_CA ctl_clear_mask(lun->have_ca, initidx); #endif if ((lun->flags & CTL_LUN_RESERVED) && (lun->res_idx == initidx)) lun->flags &= ~CTL_LUN_RESERVED; ctl_est_ua(lun, initidx, CTL_UA_I_T_NEXUS_LOSS); mtx_unlock(&lun->lun_lock); } mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; return (0); } static int ctl_abort_task(union ctl_io *io) { union ctl_io *xio; struct ctl_lun *lun; struct ctl_softc *softc; #if 0 struct sbuf sb; char printbuf[128]; #endif int found; uint32_t targ_lun; softc = control_softc; found = 0; /* * Look up the LUN. */ targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || (lun = softc->ctl_luns[targ_lun]) == NULL) { mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST; return (1); } #if 0 printf("ctl_abort_task: called for lun %lld, tag %d type %d\n", lun->lun, io->taskio.tag_num, io->taskio.tag_type); #endif mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); /* * Run through the OOA queue and attempt to find the given I/O. * The target port, initiator ID, tag type and tag number have to * match the values that we got from the initiator. If we have an * untagged command to abort, simply abort the first untagged command * we come to. We only allow one untagged command at a time of course. */ for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { #if 0 sbuf_new(&sb, printbuf, sizeof(printbuf), SBUF_FIXEDLEN); sbuf_printf(&sb, "LUN %lld tag %d type %d%s%s%s%s: ", lun->lun, xio->scsiio.tag_num, xio->scsiio.tag_type, (xio->io_hdr.blocked_links.tqe_prev == NULL) ? "" : " BLOCKED", (xio->io_hdr.flags & CTL_FLAG_DMA_INPROG) ? " DMA" : "", (xio->io_hdr.flags & CTL_FLAG_ABORT) ? " ABORT" : "", (xio->io_hdr.flags & CTL_FLAG_IS_WAS_ON_RTR ? " RTR" : "")); ctl_scsi_command_string(&xio->scsiio, NULL, &sb); sbuf_finish(&sb); printf("%s\n", sbuf_data(&sb)); #endif if ((xio->io_hdr.nexus.targ_port != io->io_hdr.nexus.targ_port) || (xio->io_hdr.nexus.initid != io->io_hdr.nexus.initid) || (xio->io_hdr.flags & CTL_FLAG_ABORT)) continue; /* * If the abort says that the task is untagged, the * task in the queue must be untagged. Otherwise, * we just check to see whether the tag numbers * match. This is because the QLogic firmware * doesn't pass back the tag type in an abort * request. */ #if 0 if (((xio->scsiio.tag_type == CTL_TAG_UNTAGGED) && (io->taskio.tag_type == CTL_TAG_UNTAGGED)) || (xio->scsiio.tag_num == io->taskio.tag_num)) #endif /* * XXX KDM we've got problems with FC, because it * doesn't send down a tag type with aborts. So we * can only really go by the tag number... * This may cause problems with parallel SCSI. * Need to figure that out!! */ if (xio->scsiio.tag_num == io->taskio.tag_num) { xio->io_hdr.flags |= CTL_FLAG_ABORT; found = 1; if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0 && !(lun->flags & CTL_LUN_PRIMARY_SC)) { union ctl_ha_msg msg_info; msg_info.hdr.nexus = io->io_hdr.nexus; msg_info.task.task_action = CTL_TASK_ABORT_TASK; msg_info.task.tag_num = io->taskio.tag_num; msg_info.task.tag_type = io->taskio.tag_type; msg_info.hdr.msg_type = CTL_MSG_MANAGE_TASKS; msg_info.hdr.original_sc = NULL; msg_info.hdr.serializing_sc = NULL; #if 0 printf("Sent Abort to other side\n"); #endif ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg_info, sizeof(msg_info.task), M_NOWAIT); } #if 0 printf("ctl_abort_task: found I/O to abort\n"); #endif } } mtx_unlock(&lun->lun_lock); if (found == 0) { /* * This isn't really an error. It's entirely possible for * the abort and command completion to cross on the wire. * This is more of an informative/diagnostic error. */ #if 0 printf("ctl_abort_task: ABORT sent for nonexistent I/O: " "%u:%u:%u tag %d type %d\n", io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_lun, io->taskio.tag_num, io->taskio.tag_type); #endif } io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; return (0); } static int ctl_query_task(union ctl_io *io, int task_set) { union ctl_io *xio; struct ctl_lun *lun; struct ctl_softc *softc; int found = 0; uint32_t targ_lun; softc = control_softc; targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || (lun = softc->ctl_luns[targ_lun]) == NULL) { mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST; return (1); } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); for (xio = (union ctl_io *)TAILQ_FIRST(&lun->ooa_queue); xio != NULL; xio = (union ctl_io *)TAILQ_NEXT(&xio->io_hdr, ooa_links)) { if ((xio->io_hdr.nexus.targ_port != io->io_hdr.nexus.targ_port) || (xio->io_hdr.nexus.initid != io->io_hdr.nexus.initid) || (xio->io_hdr.flags & CTL_FLAG_ABORT)) continue; if (task_set || xio->scsiio.tag_num == io->taskio.tag_num) { found = 1; break; } } mtx_unlock(&lun->lun_lock); if (found) io->taskio.task_status = CTL_TASK_FUNCTION_SUCCEEDED; else io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; return (0); } static int ctl_query_async_event(union ctl_io *io) { struct ctl_lun *lun; struct ctl_softc *softc; ctl_ua_type ua; uint32_t targ_lun, initidx; softc = control_softc; targ_lun = io->io_hdr.nexus.targ_mapped_lun; mtx_lock(&softc->ctl_lock); if ((targ_lun >= CTL_MAX_LUNS) || (lun = softc->ctl_luns[targ_lun]) == NULL) { mtx_unlock(&softc->ctl_lock); io->taskio.task_status = CTL_TASK_LUN_DOES_NOT_EXIST; return (1); } mtx_lock(&lun->lun_lock); mtx_unlock(&softc->ctl_lock); initidx = ctl_get_initindex(&io->io_hdr.nexus); ua = ctl_build_qae(lun, initidx, io->taskio.task_resp); mtx_unlock(&lun->lun_lock); if (ua != CTL_UA_NONE) io->taskio.task_status = CTL_TASK_FUNCTION_SUCCEEDED; else io->taskio.task_status = CTL_TASK_FUNCTION_COMPLETE; return (0); } static void ctl_run_task(union ctl_io *io) { struct ctl_softc *softc = control_softc; int retval = 1; CTL_DEBUG_PRINT(("ctl_run_task\n")); KASSERT(io->io_hdr.io_type == CTL_IO_TASK, ("ctl_run_task: Unextected io_type %d\n", io->io_hdr.io_type)); io->taskio.task_status = CTL_TASK_FUNCTION_NOT_SUPPORTED; bzero(io->taskio.task_resp, sizeof(io->taskio.task_resp)); switch (io->taskio.task_action) { case CTL_TASK_ABORT_TASK: retval = ctl_abort_task(io); break; case CTL_TASK_ABORT_TASK_SET: case CTL_TASK_CLEAR_TASK_SET: retval = ctl_abort_task_set(io); break; case CTL_TASK_CLEAR_ACA: break; case CTL_TASK_I_T_NEXUS_RESET: retval = ctl_i_t_nexus_reset(io); break; case CTL_TASK_LUN_RESET: retval = ctl_lun_reset(softc, io); break; case CTL_TASK_TARGET_RESET: retval = ctl_target_reset(softc, io, CTL_UA_TARG_RESET); break; case CTL_TASK_BUS_RESET: retval = ctl_bus_reset(softc, io); break; case CTL_TASK_PORT_LOGIN: break; case CTL_TASK_PORT_LOGOUT: break; case CTL_TASK_QUERY_TASK: retval = ctl_query_task(io, 0); break; case CTL_TASK_QUERY_TASK_SET: retval = ctl_query_task(io, 1); break; case CTL_TASK_QUERY_ASYNC_EVENT: retval = ctl_query_async_event(io); break; default: printf("%s: got unknown task management event %d\n", __func__, io->taskio.task_action); break; } if (retval == 0) io->io_hdr.status = CTL_SUCCESS; else io->io_hdr.status = CTL_ERROR; ctl_done(io); } /* * For HA operation. Handle commands that come in from the other * controller. */ static void ctl_handle_isc(union ctl_io *io) { int free_io; struct ctl_lun *lun; struct ctl_softc *softc; uint32_t targ_lun; softc = control_softc; targ_lun = io->io_hdr.nexus.targ_mapped_lun; lun = softc->ctl_luns[targ_lun]; switch (io->io_hdr.msg_type) { case CTL_MSG_SERIALIZE: free_io = ctl_serialize_other_sc_cmd(&io->scsiio); break; case CTL_MSG_R2R: { const struct ctl_cmd_entry *entry; /* * This is only used in SER_ONLY mode. */ free_io = 0; entry = ctl_get_cmd_entry(&io->scsiio, NULL); mtx_lock(&lun->lun_lock); if (ctl_scsiio_lun_check(lun, entry, (struct ctl_scsiio *)io) != 0) { mtx_unlock(&lun->lun_lock); ctl_done(io); break; } io->io_hdr.flags |= CTL_FLAG_IS_WAS_ON_RTR; mtx_unlock(&lun->lun_lock); ctl_enqueue_rtr(io); break; } case CTL_MSG_FINISH_IO: if (softc->ha_mode == CTL_HA_MODE_XFER) { free_io = 0; ctl_done(io); } else { free_io = 1; mtx_lock(&lun->lun_lock); TAILQ_REMOVE(&lun->ooa_queue, &io->io_hdr, ooa_links); ctl_check_blocked(lun); mtx_unlock(&lun->lun_lock); } break; case CTL_MSG_PERS_ACTION: ctl_hndl_per_res_out_on_other_sc( (union ctl_ha_msg *)&io->presio.pr_msg); free_io = 1; break; case CTL_MSG_BAD_JUJU: free_io = 0; ctl_done(io); break; case CTL_MSG_DATAMOVE: /* Only used in XFER mode */ free_io = 0; ctl_datamove_remote(io); break; case CTL_MSG_DATAMOVE_DONE: /* Only used in XFER mode */ free_io = 0; io->scsiio.be_move_done(io); break; case CTL_MSG_FAILOVER: mtx_lock(&lun->lun_lock); ctl_failover_lun(lun); mtx_unlock(&lun->lun_lock); free_io = 1; break; default: free_io = 1; printf("%s: Invalid message type %d\n", __func__, io->io_hdr.msg_type); break; } if (free_io) ctl_free_io(io); } /* * Returns the match type in the case of a match, or CTL_LUN_PAT_NONE if * there is no match. */ static ctl_lun_error_pattern ctl_cmd_pattern_match(struct ctl_scsiio *ctsio, struct ctl_error_desc *desc) { const struct ctl_cmd_entry *entry; ctl_lun_error_pattern filtered_pattern, pattern; pattern = desc->error_pattern; /* * XXX KDM we need more data passed into this function to match a * custom pattern, and we actually need to implement custom pattern * matching. */ if (pattern & CTL_LUN_PAT_CMD) return (CTL_LUN_PAT_CMD); if ((pattern & CTL_LUN_PAT_MASK) == CTL_LUN_PAT_ANY) return (CTL_LUN_PAT_ANY); entry = ctl_get_cmd_entry(ctsio, NULL); filtered_pattern = entry->pattern & pattern; /* * If the user requested specific flags in the pattern (e.g. * CTL_LUN_PAT_RANGE), make sure the command supports all of those * flags. * * If the user did not specify any flags, it doesn't matter whether * or not the command supports the flags. */ if ((filtered_pattern & ~CTL_LUN_PAT_MASK) != (pattern & ~CTL_LUN_PAT_MASK)) return (CTL_LUN_PAT_NONE); /* * If the user asked for a range check, see if the requested LBA * range overlaps with this command's LBA range. */ if (filtered_pattern & CTL_LUN_PAT_RANGE) { uint64_t lba1; uint64_t len1; ctl_action action; int retval; retval = ctl_get_lba_len((union ctl_io *)ctsio, &lba1, &len1); if (retval != 0) return (CTL_LUN_PAT_NONE); action = ctl_extent_check_lba(lba1, len1, desc->lba_range.lba, desc->lba_range.len, FALSE); /* * A "pass" means that the LBA ranges don't overlap, so * this doesn't match the user's range criteria. */ if (action == CTL_ACTION_PASS) return (CTL_LUN_PAT_NONE); } return (filtered_pattern); } static void ctl_inject_error(struct ctl_lun *lun, union ctl_io *io) { struct ctl_error_desc *desc, *desc2; mtx_assert(&lun->lun_lock, MA_OWNED); STAILQ_FOREACH_SAFE(desc, &lun->error_list, links, desc2) { ctl_lun_error_pattern pattern; /* * Check to see whether this particular command matches * the pattern in the descriptor. */ pattern = ctl_cmd_pattern_match(&io->scsiio, desc); if ((pattern & CTL_LUN_PAT_MASK) == CTL_LUN_PAT_NONE) continue; switch (desc->lun_error & CTL_LUN_INJ_TYPE) { case CTL_LUN_INJ_ABORTED: ctl_set_aborted(&io->scsiio); break; case CTL_LUN_INJ_MEDIUM_ERR: ctl_set_medium_error(&io->scsiio, (io->io_hdr.flags & CTL_FLAG_DATA_MASK) != CTL_FLAG_DATA_OUT); break; case CTL_LUN_INJ_UA: /* 29h/00h POWER ON, RESET, OR BUS DEVICE RESET * OCCURRED */ ctl_set_ua(&io->scsiio, 0x29, 0x00); break; case CTL_LUN_INJ_CUSTOM: /* * We're assuming the user knows what he is doing. * Just copy the sense information without doing * checks. */ bcopy(&desc->custom_sense, &io->scsiio.sense_data, MIN(sizeof(desc->custom_sense), sizeof(io->scsiio.sense_data))); io->scsiio.scsi_status = SCSI_STATUS_CHECK_COND; io->scsiio.sense_len = SSD_FULL_SIZE; io->io_hdr.status = CTL_SCSI_ERROR | CTL_AUTOSENSE; break; case CTL_LUN_INJ_NONE: default: /* * If this is an error injection type we don't know * about, clear the continuous flag (if it is set) * so it will get deleted below. */ desc->lun_error &= ~CTL_LUN_INJ_CONTINUOUS; break; } /* * By default, each error injection action is a one-shot */ if (desc->lun_error & CTL_LUN_INJ_CONTINUOUS) continue; STAILQ_REMOVE(&lun->error_list, desc, ctl_error_desc, links); free(desc, M_CTL); } } #ifdef CTL_IO_DELAY static void ctl_datamove_timer_wakeup(void *arg) { union ctl_io *io; io = (union ctl_io *)arg; ctl_datamove(io); } #endif /* CTL_IO_DELAY */ void ctl_datamove(union ctl_io *io) { struct ctl_lun *lun; void (*fe_datamove)(union ctl_io *io); mtx_assert(&control_softc->ctl_lock, MA_NOTOWNED); CTL_DEBUG_PRINT(("ctl_datamove\n")); lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; #ifdef CTL_TIME_IO if ((time_uptime - io->io_hdr.start_time) > ctl_time_io_secs) { char str[256]; char path_str[64]; struct sbuf sb; ctl_scsi_path_string(io, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); sbuf_cat(&sb, path_str); switch (io->io_hdr.io_type) { case CTL_IO_SCSI: ctl_scsi_command_string(&io->scsiio, NULL, &sb); sbuf_printf(&sb, "\n"); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "Tag: 0x%04x, type %d\n", io->scsiio.tag_num, io->scsiio.tag_type); break; case CTL_IO_TASK: sbuf_printf(&sb, "Task I/O type: %d, Tag: 0x%04x, " "Tag Type: %d\n", io->taskio.task_action, io->taskio.tag_num, io->taskio.tag_type); break; default: printf("Invalid CTL I/O type %d\n", io->io_hdr.io_type); panic("Invalid CTL I/O type %d\n", io->io_hdr.io_type); break; } sbuf_cat(&sb, path_str); sbuf_printf(&sb, "ctl_datamove: %jd seconds\n", (intmax_t)time_uptime - io->io_hdr.start_time); sbuf_finish(&sb); printf("%s", sbuf_data(&sb)); } #endif /* CTL_TIME_IO */ #ifdef CTL_IO_DELAY if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) { io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE; } else { if ((lun != NULL) && (lun->delay_info.datamove_delay > 0)) { callout_init(&io->io_hdr.delay_callout, /*mpsafe*/ 1); io->io_hdr.flags |= CTL_FLAG_DELAY_DONE; callout_reset(&io->io_hdr.delay_callout, lun->delay_info.datamove_delay * hz, ctl_datamove_timer_wakeup, io); if (lun->delay_info.datamove_type == CTL_DELAY_TYPE_ONESHOT) lun->delay_info.datamove_delay = 0; return; } } #endif /* * This command has been aborted. Set the port status, so we fail * the data move. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { printf("ctl_datamove: tag 0x%04x on (%u:%u:%u) aborted\n", io->scsiio.tag_num, io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_lun); io->io_hdr.port_status = 31337; /* * Note that the backend, in this case, will get the * callback in its context. In other cases it may get * called in the frontend's interrupt thread context. */ io->scsiio.be_move_done(io); return; } /* Don't confuse frontend with zero length data move. */ if (io->scsiio.kern_data_len == 0) { io->scsiio.be_move_done(io); return; } /* * If we're in XFER mode and this I/O is from the other shelf * controller, we need to send the DMA to the other side to * actually transfer the data to/from the host. In serialize only * mode the transfer happens below CTL and ctl_datamove() is only * called on the machine that originally received the I/O. */ if ((control_softc->ha_mode == CTL_HA_MODE_XFER) && (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { union ctl_ha_msg msg; uint32_t sg_entries_sent; int do_sg_copy; int i; memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_DATAMOVE; msg.hdr.original_sc = io->io_hdr.original_sc; msg.hdr.serializing_sc = io; msg.hdr.nexus = io->io_hdr.nexus; msg.hdr.status = io->io_hdr.status; msg.dt.flags = io->io_hdr.flags; /* * We convert everything into a S/G list here. We can't * pass by reference, only by value between controllers. * So we can't pass a pointer to the S/G list, only as many * S/G entries as we can fit in here. If it's possible for * us to get more than CTL_HA_MAX_SG_ENTRIES S/G entries, * then we need to break this up into multiple transfers. */ if (io->scsiio.kern_sg_entries == 0) { msg.dt.kern_sg_entries = 1; #if 0 /* * Convert to a physical address if this is a * virtual address. */ if (io->io_hdr.flags & CTL_FLAG_BUS_ADDR) { msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr; } else { /* * XXX KDM use busdma here! */ msg.dt.sg_list[0].addr = (void *) vtophys(io->scsiio.kern_data_ptr); } #else KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0, ("HA does not support BUS_ADDR")); msg.dt.sg_list[0].addr = io->scsiio.kern_data_ptr; #endif msg.dt.sg_list[0].len = io->scsiio.kern_data_len; do_sg_copy = 0; } else { msg.dt.kern_sg_entries = io->scsiio.kern_sg_entries; do_sg_copy = 1; } msg.dt.kern_data_len = io->scsiio.kern_data_len; msg.dt.kern_total_len = io->scsiio.kern_total_len; msg.dt.kern_data_resid = io->scsiio.kern_data_resid; msg.dt.kern_rel_offset = io->scsiio.kern_rel_offset; msg.dt.sg_sequence = 0; /* * Loop until we've sent all of the S/G entries. On the * other end, we'll recompose these S/G entries into one * contiguous list before passing it to the */ for (sg_entries_sent = 0; sg_entries_sent < msg.dt.kern_sg_entries; msg.dt.sg_sequence++) { msg.dt.cur_sg_entries = MIN((sizeof(msg.dt.sg_list)/ sizeof(msg.dt.sg_list[0])), msg.dt.kern_sg_entries - sg_entries_sent); if (do_sg_copy != 0) { struct ctl_sg_entry *sgl; int j; sgl = (struct ctl_sg_entry *) io->scsiio.kern_data_ptr; /* * If this is in cached memory, flush the cache * before we send the DMA request to the other * controller. We want to do this in either * the * read or the write case. The read * case is straightforward. In the write * case, we want to make sure nothing is * in the local cache that could overwrite * the DMAed data. */ for (i = sg_entries_sent, j = 0; i < msg.dt.cur_sg_entries; i++, j++) { #if 0 if ((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0) { /* * XXX KDM use busdma. */ msg.dt.sg_list[j].addr =(void *) vtophys(sgl[i].addr); } else { msg.dt.sg_list[j].addr = sgl[i].addr; } #else KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0, ("HA does not support BUS_ADDR")); msg.dt.sg_list[j].addr = sgl[i].addr; #endif msg.dt.sg_list[j].len = sgl[i].len; } } sg_entries_sent += msg.dt.cur_sg_entries; if (sg_entries_sent >= msg.dt.kern_sg_entries) msg.dt.sg_last = 1; else msg.dt.sg_last = 0; if (ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.dt) - sizeof(msg.dt.sg_list) + sizeof(struct ctl_sg_entry)*msg.dt.cur_sg_entries, M_WAITOK) > CTL_HA_STATUS_SUCCESS) { io->io_hdr.port_status = 31341; io->scsiio.be_move_done(io); return; } msg.dt.sent_sg_entries = sg_entries_sent; } /* * Officially handover the request from us to peer. * If failover has just happened, then we must return error. * If failover happen just after, then it is not our problem. */ if (lun) mtx_lock(&lun->lun_lock); if (io->io_hdr.flags & CTL_FLAG_FAILOVER) { if (lun) mtx_unlock(&lun->lun_lock); io->io_hdr.port_status = 31342; io->scsiio.be_move_done(io); return; } io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; io->io_hdr.flags |= CTL_FLAG_DMA_INPROG; if (lun) mtx_unlock(&lun->lun_lock); } else { /* * Lookup the fe_datamove() function for this particular * front end. */ fe_datamove = ctl_io_port(&io->io_hdr)->fe_datamove; fe_datamove(io); } } static void ctl_send_datamove_done(union ctl_io *io, int have_lock) { union ctl_ha_msg msg; memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_DATAMOVE_DONE; msg.hdr.original_sc = io; msg.hdr.serializing_sc = io->io_hdr.serializing_sc; msg.hdr.nexus = io->io_hdr.nexus; msg.hdr.status = io->io_hdr.status; msg.scsi.tag_num = io->scsiio.tag_num; msg.scsi.tag_type = io->scsiio.tag_type; msg.scsi.scsi_status = io->scsiio.scsi_status; memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data, io->scsiio.sense_len); msg.scsi.sense_len = io->scsiio.sense_len; msg.scsi.sense_residual = io->scsiio.sense_residual; msg.scsi.fetd_status = io->io_hdr.port_status; msg.scsi.residual = io->scsiio.residual; io->io_hdr.flags &= ~CTL_FLAG_IO_ACTIVE; if (io->io_hdr.flags & CTL_FLAG_FAILOVER) { ctl_failover_io(io, /*have_lock*/ have_lock); return; } ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.scsi) - sizeof(msg.scsi.sense_data) + msg.scsi.sense_len, M_WAITOK); } /* * The DMA to the remote side is done, now we need to tell the other side * we're done so it can continue with its data movement. */ static void ctl_datamove_remote_write_cb(struct ctl_ha_dt_req *rq) { union ctl_io *io; int i; io = rq->context; if (rq->ret != CTL_HA_STATUS_SUCCESS) { printf("%s: ISC DMA write failed with error %d", __func__, rq->ret); ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ rq->ret); } ctl_dt_req_free(rq); for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(io->io_hdr.local_sglist[i].addr, M_CTL); free(io->io_hdr.remote_sglist, M_CTL); io->io_hdr.remote_sglist = NULL; io->io_hdr.local_sglist = NULL; /* * The data is in local and remote memory, so now we need to send * status (good or back) back to the other side. */ ctl_send_datamove_done(io, /*have_lock*/ 0); } /* * We've moved the data from the host/controller into local memory. Now we * need to push it over to the remote controller's memory. */ static int ctl_datamove_remote_dm_write_cb(union ctl_io *io) { int retval; retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_WRITE, ctl_datamove_remote_write_cb); return (retval); } static void ctl_datamove_remote_write(union ctl_io *io) { int retval; void (*fe_datamove)(union ctl_io *io); /* * - Get the data from the host/HBA into local memory. * - DMA memory from the local controller to the remote controller. * - Send status back to the remote controller. */ retval = ctl_datamove_remote_sgl_setup(io); if (retval != 0) return; /* Switch the pointer over so the FETD knows what to do */ io->scsiio.kern_data_ptr = (uint8_t *)io->io_hdr.local_sglist; /* * Use a custom move done callback, since we need to send completion * back to the other controller, not to the backend on this side. */ io->scsiio.be_move_done = ctl_datamove_remote_dm_write_cb; fe_datamove = ctl_io_port(&io->io_hdr)->fe_datamove; fe_datamove(io); } static int ctl_datamove_remote_dm_read_cb(union ctl_io *io) { #if 0 char str[256]; char path_str[64]; struct sbuf sb; #endif int i; for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(io->io_hdr.local_sglist[i].addr, M_CTL); free(io->io_hdr.remote_sglist, M_CTL); io->io_hdr.remote_sglist = NULL; io->io_hdr.local_sglist = NULL; #if 0 scsi_path_string(io, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); sbuf_cat(&sb, path_str); scsi_command_string(&io->scsiio, NULL, &sb); sbuf_printf(&sb, "\n"); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "Tag: 0x%04x, type %d\n", io->scsiio.tag_num, io->scsiio.tag_type); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "%s: flags %#x, status %#x\n", __func__, io->io_hdr.flags, io->io_hdr.status); sbuf_finish(&sb); printk("%s", sbuf_data(&sb)); #endif /* * The read is done, now we need to send status (good or bad) back * to the other side. */ ctl_send_datamove_done(io, /*have_lock*/ 0); return (0); } static void ctl_datamove_remote_read_cb(struct ctl_ha_dt_req *rq) { union ctl_io *io; void (*fe_datamove)(union ctl_io *io); io = rq->context; if (rq->ret != CTL_HA_STATUS_SUCCESS) { printf("%s: ISC DMA read failed with error %d\n", __func__, rq->ret); ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ rq->ret); } ctl_dt_req_free(rq); /* Switch the pointer over so the FETD knows what to do */ io->scsiio.kern_data_ptr = (uint8_t *)io->io_hdr.local_sglist; /* * Use a custom move done callback, since we need to send completion * back to the other controller, not to the backend on this side. */ io->scsiio.be_move_done = ctl_datamove_remote_dm_read_cb; /* XXX KDM add checks like the ones in ctl_datamove? */ fe_datamove = ctl_io_port(&io->io_hdr)->fe_datamove; fe_datamove(io); } static int ctl_datamove_remote_sgl_setup(union ctl_io *io) { struct ctl_sg_entry *local_sglist; struct ctl_softc *softc; uint32_t len_to_go; int retval; int i; retval = 0; softc = control_softc; local_sglist = io->io_hdr.local_sglist; len_to_go = io->scsiio.kern_data_len; /* * The difficult thing here is that the size of the various * S/G segments may be different than the size from the * remote controller. That'll make it harder when DMAing * the data back to the other side. */ for (i = 0; len_to_go > 0; i++) { local_sglist[i].len = MIN(len_to_go, CTL_HA_DATAMOVE_SEGMENT); local_sglist[i].addr = malloc(local_sglist[i].len, M_CTL, M_WAITOK); len_to_go -= local_sglist[i].len; } /* * Reset the number of S/G entries accordingly. The original * number of S/G entries is available in rem_sg_entries. */ io->scsiio.kern_sg_entries = i; #if 0 printf("%s: kern_sg_entries = %d\n", __func__, io->scsiio.kern_sg_entries); for (i = 0; i < io->scsiio.kern_sg_entries; i++) printf("%s: sg[%d] = %p, %lu\n", __func__, i, local_sglist[i].addr, local_sglist[i].len); #endif return (retval); } static int ctl_datamove_remote_xfer(union ctl_io *io, unsigned command, ctl_ha_dt_cb callback) { struct ctl_ha_dt_req *rq; struct ctl_sg_entry *remote_sglist, *local_sglist; uint32_t local_used, remote_used, total_used; int i, j, isc_ret; rq = ctl_dt_req_alloc(); /* * If we failed to allocate the request, and if the DMA didn't fail * anyway, set busy status. This is just a resource allocation * failure. */ if ((rq == NULL) && ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) ctl_set_busy(&io->scsiio); if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS) { if (rq != NULL) ctl_dt_req_free(rq); /* * The data move failed. We need to return status back * to the other controller. No point in trying to DMA * data to the remote controller. */ ctl_send_datamove_done(io, /*have_lock*/ 0); return (1); } local_sglist = io->io_hdr.local_sglist; remote_sglist = io->io_hdr.remote_sglist; local_used = 0; remote_used = 0; total_used = 0; /* * Pull/push the data over the wire from/to the other controller. * This takes into account the possibility that the local and * remote sglists may not be identical in terms of the size of * the elements and the number of elements. * * One fundamental assumption here is that the length allocated for * both the local and remote sglists is identical. Otherwise, we've * essentially got a coding error of some sort. */ isc_ret = CTL_HA_STATUS_SUCCESS; for (i = 0, j = 0; total_used < io->scsiio.kern_data_len; ) { uint32_t cur_len; uint8_t *tmp_ptr; rq->command = command; rq->context = io; /* * Both pointers should be aligned. But it is possible * that the allocation length is not. They should both * also have enough slack left over at the end, though, * to round up to the next 8 byte boundary. */ cur_len = MIN(local_sglist[i].len - local_used, remote_sglist[j].len - remote_used); rq->size = cur_len; tmp_ptr = (uint8_t *)local_sglist[i].addr; tmp_ptr += local_used; #if 0 /* Use physical addresses when talking to ISC hardware */ if ((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0) { /* XXX KDM use busdma */ rq->local = vtophys(tmp_ptr); } else rq->local = tmp_ptr; #else KASSERT((io->io_hdr.flags & CTL_FLAG_BUS_ADDR) == 0, ("HA does not support BUS_ADDR")); rq->local = tmp_ptr; #endif tmp_ptr = (uint8_t *)remote_sglist[j].addr; tmp_ptr += remote_used; rq->remote = tmp_ptr; rq->callback = NULL; local_used += cur_len; if (local_used >= local_sglist[i].len) { i++; local_used = 0; } remote_used += cur_len; if (remote_used >= remote_sglist[j].len) { j++; remote_used = 0; } total_used += cur_len; if (total_used >= io->scsiio.kern_data_len) rq->callback = callback; #if 0 printf("%s: %s: local %p remote %p size %d\n", __func__, (command == CTL_HA_DT_CMD_WRITE) ? "WRITE" : "READ", rq->local, rq->remote, rq->size); #endif isc_ret = ctl_dt_single(rq); if (isc_ret > CTL_HA_STATUS_SUCCESS) break; } if (isc_ret != CTL_HA_STATUS_WAIT) { rq->ret = isc_ret; callback(rq); } return (0); } static void ctl_datamove_remote_read(union ctl_io *io) { int retval; int i; /* * This will send an error to the other controller in the case of a * failure. */ retval = ctl_datamove_remote_sgl_setup(io); if (retval != 0) return; retval = ctl_datamove_remote_xfer(io, CTL_HA_DT_CMD_READ, ctl_datamove_remote_read_cb); if (retval != 0) { /* * Make sure we free memory if there was an error.. The * ctl_datamove_remote_xfer() function will send the * datamove done message, or call the callback with an * error if there is a problem. */ for (i = 0; i < io->scsiio.kern_sg_entries; i++) free(io->io_hdr.local_sglist[i].addr, M_CTL); free(io->io_hdr.remote_sglist, M_CTL); io->io_hdr.remote_sglist = NULL; io->io_hdr.local_sglist = NULL; } } /* * Process a datamove request from the other controller. This is used for * XFER mode only, not SER_ONLY mode. For writes, we DMA into local memory * first. Once that is complete, the data gets DMAed into the remote * controller's memory. For reads, we DMA from the remote controller's * memory into our memory first, and then move it out to the FETD. */ static void ctl_datamove_remote(union ctl_io *io) { mtx_assert(&control_softc->ctl_lock, MA_NOTOWNED); if (io->io_hdr.flags & CTL_FLAG_FAILOVER) { ctl_failover_io(io, /*have_lock*/ 0); return; } /* * Note that we look for an aborted I/O here, but don't do some of * the other checks that ctl_datamove() normally does. * We don't need to run the datamove delay code, since that should * have been done if need be on the other controller. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { printf("%s: tag 0x%04x on (%u:%u:%u) aborted\n", __func__, io->scsiio.tag_num, io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_lun); io->io_hdr.port_status = 31338; ctl_send_datamove_done(io, /*have_lock*/ 0); return; } if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT) ctl_datamove_remote_write(io); else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) ctl_datamove_remote_read(io); else { io->io_hdr.port_status = 31339; ctl_send_datamove_done(io, /*have_lock*/ 0); } } static int ctl_process_done(union ctl_io *io) { struct ctl_lun *lun; struct ctl_softc *softc = control_softc; void (*fe_done)(union ctl_io *io); union ctl_ha_msg msg; uint32_t targ_port = io->io_hdr.nexus.targ_port; CTL_DEBUG_PRINT(("ctl_process_done\n")); if ((io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) == 0) fe_done = softc->ctl_ports[targ_port]->fe_done; else fe_done = NULL; #ifdef CTL_TIME_IO if ((time_uptime - io->io_hdr.start_time) > ctl_time_io_secs) { char str[256]; char path_str[64]; struct sbuf sb; ctl_scsi_path_string(io, path_str, sizeof(path_str)); sbuf_new(&sb, str, sizeof(str), SBUF_FIXEDLEN); sbuf_cat(&sb, path_str); switch (io->io_hdr.io_type) { case CTL_IO_SCSI: ctl_scsi_command_string(&io->scsiio, NULL, &sb); sbuf_printf(&sb, "\n"); sbuf_cat(&sb, path_str); sbuf_printf(&sb, "Tag: 0x%04x, type %d\n", io->scsiio.tag_num, io->scsiio.tag_type); break; case CTL_IO_TASK: sbuf_printf(&sb, "Task I/O type: %d, Tag: 0x%04x, " "Tag Type: %d\n", io->taskio.task_action, io->taskio.tag_num, io->taskio.tag_type); break; default: printf("Invalid CTL I/O type %d\n", io->io_hdr.io_type); panic("Invalid CTL I/O type %d\n", io->io_hdr.io_type); break; } sbuf_cat(&sb, path_str); sbuf_printf(&sb, "ctl_process_done: %jd seconds\n", (intmax_t)time_uptime - io->io_hdr.start_time); sbuf_finish(&sb); printf("%s", sbuf_data(&sb)); } #endif /* CTL_TIME_IO */ switch (io->io_hdr.io_type) { case CTL_IO_SCSI: break; case CTL_IO_TASK: if (ctl_debug & CTL_DEBUG_INFO) ctl_io_error_print(io, NULL); if (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC) ctl_free_io(io); else fe_done(io); return (CTL_RETVAL_COMPLETE); default: panic("ctl_process_done: invalid io type %d\n", io->io_hdr.io_type); break; /* NOTREACHED */ } lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun == NULL) { CTL_DEBUG_PRINT(("NULL LUN for lun %d\n", io->io_hdr.nexus.targ_mapped_lun)); goto bailout; } mtx_lock(&lun->lun_lock); /* * Check to see if we have any errors to inject here. We only * inject errors for commands that don't already have errors set. */ if ((STAILQ_FIRST(&lun->error_list) != NULL) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS) && ((io->io_hdr.flags & CTL_FLAG_STATUS_SENT) == 0)) ctl_inject_error(lun, io); /* * XXX KDM how do we treat commands that aren't completed * successfully? * * XXX KDM should we also track I/O latency? */ if ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS && io->io_hdr.io_type == CTL_IO_SCSI) { #ifdef CTL_TIME_IO struct bintime cur_bt; #endif int type; if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) type = CTL_STATS_READ; else if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_OUT) type = CTL_STATS_WRITE; else type = CTL_STATS_NO_IO; lun->stats.ports[targ_port].bytes[type] += io->scsiio.kern_total_len; lun->stats.ports[targ_port].operations[type]++; #ifdef CTL_TIME_IO bintime_add(&lun->stats.ports[targ_port].dma_time[type], &io->io_hdr.dma_bt); lun->stats.ports[targ_port].num_dmas[type] += io->io_hdr.num_dmas; getbintime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.start_bt); bintime_add(&lun->stats.ports[targ_port].time[type], &cur_bt); #endif } /* * Remove this from the OOA queue. */ TAILQ_REMOVE(&lun->ooa_queue, &io->io_hdr, ooa_links); #ifdef CTL_TIME_IO if (TAILQ_EMPTY(&lun->ooa_queue)) lun->last_busy = getsbinuptime(); #endif /* * Run through the blocked queue on this LUN and see if anything * has become unblocked, now that this transaction is done. */ ctl_check_blocked(lun); /* * If the LUN has been invalidated, free it if there is nothing * left on its OOA queue. */ if ((lun->flags & CTL_LUN_INVALID) && TAILQ_EMPTY(&lun->ooa_queue)) { mtx_unlock(&lun->lun_lock); mtx_lock(&softc->ctl_lock); ctl_free_lun(lun); mtx_unlock(&softc->ctl_lock); } else mtx_unlock(&lun->lun_lock); bailout: /* * If this command has been aborted, make sure we set the status * properly. The FETD is responsible for freeing the I/O and doing * whatever it needs to do to clean up its state. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) ctl_set_task_aborted(&io->scsiio); /* * If enabled, print command error status. */ if ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS && (ctl_debug & CTL_DEBUG_INFO) != 0) ctl_io_error_print(io, NULL); /* * Tell the FETD or the other shelf controller we're done with this * command. Note that only SCSI commands get to this point. Task * management commands are completed above. */ if ((softc->ha_mode != CTL_HA_MODE_XFER) && (io->io_hdr.flags & CTL_FLAG_SENT_2OTHER_SC)) { memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_FINISH_IO; msg.hdr.serializing_sc = io->io_hdr.serializing_sc; msg.hdr.nexus = io->io_hdr.nexus; ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.scsi) - sizeof(msg.scsi.sense_data), M_WAITOK); } if ((softc->ha_mode == CTL_HA_MODE_XFER) && (io->io_hdr.flags & CTL_FLAG_FROM_OTHER_SC)) { memset(&msg, 0, sizeof(msg)); msg.hdr.msg_type = CTL_MSG_FINISH_IO; msg.hdr.original_sc = io->io_hdr.original_sc; msg.hdr.nexus = io->io_hdr.nexus; msg.hdr.status = io->io_hdr.status; msg.scsi.scsi_status = io->scsiio.scsi_status; msg.scsi.tag_num = io->scsiio.tag_num; msg.scsi.tag_type = io->scsiio.tag_type; msg.scsi.sense_len = io->scsiio.sense_len; msg.scsi.sense_residual = io->scsiio.sense_residual; msg.scsi.residual = io->scsiio.residual; memcpy(&msg.scsi.sense_data, &io->scsiio.sense_data, io->scsiio.sense_len); ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.scsi) - sizeof(msg.scsi.sense_data) + msg.scsi.sense_len, M_WAITOK); ctl_free_io(io); } else fe_done(io); return (CTL_RETVAL_COMPLETE); } #ifdef CTL_WITH_CA /* * Front end should call this if it doesn't do autosense. When the request * sense comes back in from the initiator, we'll dequeue this and send it. */ int ctl_queue_sense(union ctl_io *io) { struct ctl_lun *lun; struct ctl_port *port; struct ctl_softc *softc; uint32_t initidx, targ_lun; softc = control_softc; CTL_DEBUG_PRINT(("ctl_queue_sense\n")); /* * LUN lookup will likely move to the ctl_work_thread() once we * have our new queueing infrastructure (that doesn't put things on * a per-LUN queue initially). That is so that we can handle * things like an INQUIRY to a LUN that we don't have enabled. We * can't deal with that right now. */ mtx_lock(&softc->ctl_lock); /* * If we don't have a LUN for this, just toss the sense * information. */ port = ctl_io_port(&ctsio->io_hdr); targ_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun); if ((targ_lun < CTL_MAX_LUNS) && (softc->ctl_luns[targ_lun] != NULL)) lun = softc->ctl_luns[targ_lun]; else goto bailout; initidx = ctl_get_initindex(&io->io_hdr.nexus); mtx_lock(&lun->lun_lock); /* * Already have CA set for this LUN...toss the sense information. */ if (ctl_is_set(lun->have_ca, initidx)) { mtx_unlock(&lun->lun_lock); goto bailout; } memcpy(&lun->pending_sense[initidx], &io->scsiio.sense_data, MIN(sizeof(lun->pending_sense[initidx]), sizeof(io->scsiio.sense_data))); ctl_set_mask(lun->have_ca, initidx); mtx_unlock(&lun->lun_lock); bailout: mtx_unlock(&softc->ctl_lock); ctl_free_io(io); return (CTL_RETVAL_COMPLETE); } #endif /* * Primary command inlet from frontend ports. All SCSI and task I/O * requests must go through this function. */ int ctl_queue(union ctl_io *io) { struct ctl_port *port; CTL_DEBUG_PRINT(("ctl_queue cdb[0]=%02X\n", io->scsiio.cdb[0])); #ifdef CTL_TIME_IO io->io_hdr.start_time = time_uptime; getbintime(&io->io_hdr.start_bt); #endif /* CTL_TIME_IO */ /* Map FE-specific LUN ID into global one. */ port = ctl_io_port(&io->io_hdr); io->io_hdr.nexus.targ_mapped_lun = ctl_lun_map_from_port(port, io->io_hdr.nexus.targ_lun); switch (io->io_hdr.io_type) { case CTL_IO_SCSI: case CTL_IO_TASK: if (ctl_debug & CTL_DEBUG_CDB) ctl_io_print(io); ctl_enqueue_incoming(io); break; default: printf("ctl_queue: unknown I/O type %d\n", io->io_hdr.io_type); return (EINVAL); } return (CTL_RETVAL_COMPLETE); } #ifdef CTL_IO_DELAY static void ctl_done_timer_wakeup(void *arg) { union ctl_io *io; io = (union ctl_io *)arg; ctl_done(io); } #endif /* CTL_IO_DELAY */ void ctl_serseq_done(union ctl_io *io) { struct ctl_lun *lun; lun = (struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if (lun->be_lun == NULL || lun->be_lun->serseq == CTL_LUN_SERSEQ_OFF) return; mtx_lock(&lun->lun_lock); io->io_hdr.flags |= CTL_FLAG_SERSEQ_DONE; ctl_check_blocked(lun); mtx_unlock(&lun->lun_lock); } void ctl_done(union ctl_io *io) { /* * Enable this to catch duplicate completion issues. */ #if 0 if (io->io_hdr.flags & CTL_FLAG_ALREADY_DONE) { printf("%s: type %d msg %d cdb %x iptl: " "%u:%u:%u tag 0x%04x " "flag %#x status %x\n", __func__, io->io_hdr.io_type, io->io_hdr.msg_type, io->scsiio.cdb[0], io->io_hdr.nexus.initid, io->io_hdr.nexus.targ_port, io->io_hdr.nexus.targ_lun, (io->io_hdr.io_type == CTL_IO_TASK) ? io->taskio.tag_num : io->scsiio.tag_num, io->io_hdr.flags, io->io_hdr.status); } else io->io_hdr.flags |= CTL_FLAG_ALREADY_DONE; #endif /* * This is an internal copy of an I/O, and should not go through * the normal done processing logic. */ if (io->io_hdr.flags & CTL_FLAG_INT_COPY) return; #ifdef CTL_IO_DELAY if (io->io_hdr.flags & CTL_FLAG_DELAY_DONE) { struct ctl_lun *lun; lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; io->io_hdr.flags &= ~CTL_FLAG_DELAY_DONE; } else { struct ctl_lun *lun; lun =(struct ctl_lun *)io->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; if ((lun != NULL) && (lun->delay_info.done_delay > 0)) { callout_init(&io->io_hdr.delay_callout, /*mpsafe*/ 1); io->io_hdr.flags |= CTL_FLAG_DELAY_DONE; callout_reset(&io->io_hdr.delay_callout, lun->delay_info.done_delay * hz, ctl_done_timer_wakeup, io); if (lun->delay_info.done_type == CTL_DELAY_TYPE_ONESHOT) lun->delay_info.done_delay = 0; return; } } #endif /* CTL_IO_DELAY */ ctl_enqueue_done(io); } static void ctl_work_thread(void *arg) { struct ctl_thread *thr = (struct ctl_thread *)arg; struct ctl_softc *softc = thr->ctl_softc; union ctl_io *io; int retval; CTL_DEBUG_PRINT(("ctl_work_thread starting\n")); for (;;) { retval = 0; /* * We handle the queues in this order: * - ISC * - done queue (to free up resources, unblock other commands) * - RtR queue * - incoming queue * * If those queues are empty, we break out of the loop and * go to sleep. */ mtx_lock(&thr->queue_lock); io = (union ctl_io *)STAILQ_FIRST(&thr->isc_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->isc_queue, links); mtx_unlock(&thr->queue_lock); ctl_handle_isc(io); continue; } io = (union ctl_io *)STAILQ_FIRST(&thr->done_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->done_queue, links); /* clear any blocked commands, call fe_done */ mtx_unlock(&thr->queue_lock); retval = ctl_process_done(io); continue; } io = (union ctl_io *)STAILQ_FIRST(&thr->incoming_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->incoming_queue, links); mtx_unlock(&thr->queue_lock); if (io->io_hdr.io_type == CTL_IO_TASK) ctl_run_task(io); else ctl_scsiio_precheck(softc, &io->scsiio); continue; } io = (union ctl_io *)STAILQ_FIRST(&thr->rtr_queue); if (io != NULL) { STAILQ_REMOVE_HEAD(&thr->rtr_queue, links); mtx_unlock(&thr->queue_lock); retval = ctl_scsiio(&io->scsiio); if (retval != CTL_RETVAL_COMPLETE) CTL_DEBUG_PRINT(("ctl_scsiio failed\n")); continue; } /* Sleep until we have something to do. */ mtx_sleep(thr, &thr->queue_lock, PDROP | PRIBIO, "-", 0); } } static void ctl_lun_thread(void *arg) { struct ctl_softc *softc = (struct ctl_softc *)arg; struct ctl_be_lun *be_lun; int retval; CTL_DEBUG_PRINT(("ctl_lun_thread starting\n")); for (;;) { retval = 0; mtx_lock(&softc->ctl_lock); be_lun = STAILQ_FIRST(&softc->pending_lun_queue); if (be_lun != NULL) { STAILQ_REMOVE_HEAD(&softc->pending_lun_queue, links); mtx_unlock(&softc->ctl_lock); ctl_create_lun(be_lun); continue; } /* Sleep until we have something to do. */ mtx_sleep(&softc->pending_lun_queue, &softc->ctl_lock, PDROP | PRIBIO, "-", 0); } } static void ctl_thresh_thread(void *arg) { struct ctl_softc *softc = (struct ctl_softc *)arg; struct ctl_lun *lun; struct ctl_be_lun *be_lun; struct scsi_da_rw_recovery_page *rwpage; struct ctl_logical_block_provisioning_page *page; const char *attr; union ctl_ha_msg msg; uint64_t thres, val; int i, e, set; CTL_DEBUG_PRINT(("ctl_thresh_thread starting\n")); for (;;) { mtx_lock(&softc->ctl_lock); STAILQ_FOREACH(lun, &softc->lun_list, links) { be_lun = lun->be_lun; if ((lun->flags & CTL_LUN_DISABLED) || (lun->flags & CTL_LUN_OFFLINE) || lun->backend->lun_attr == NULL) continue; if ((lun->flags & CTL_LUN_PRIMARY_SC) == 0 && softc->ha_mode == CTL_HA_MODE_XFER) continue; rwpage = &lun->mode_pages.rw_er_page[CTL_PAGE_CURRENT]; if ((rwpage->byte8 & SMS_RWER_LBPERE) == 0) continue; e = 0; page = &lun->mode_pages.lbp_page[CTL_PAGE_CURRENT]; for (i = 0; i < CTL_NUM_LBP_THRESH; i++) { if ((page->descr[i].flags & SLBPPD_ENABLED) == 0) continue; thres = scsi_4btoul(page->descr[i].count); thres <<= CTL_LBP_EXPONENT; switch (page->descr[i].resource) { case 0x01: attr = "blocksavail"; break; case 0x02: attr = "blocksused"; break; case 0xf1: attr = "poolblocksavail"; break; case 0xf2: attr = "poolblocksused"; break; default: continue; } mtx_unlock(&softc->ctl_lock); // XXX val = lun->backend->lun_attr( lun->be_lun->be_lun, attr); mtx_lock(&softc->ctl_lock); if (val == UINT64_MAX) continue; if ((page->descr[i].flags & SLBPPD_ARMING_MASK) == SLBPPD_ARMING_INC) e = (val >= thres); else e = (val <= thres); if (e) break; } mtx_lock(&lun->lun_lock); if (e) { scsi_u64to8b((uint8_t *)&page->descr[i] - (uint8_t *)page, lun->ua_tpt_info); if (lun->lasttpt == 0 || time_uptime - lun->lasttpt >= CTL_LBP_UA_PERIOD) { lun->lasttpt = time_uptime; ctl_est_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES); set = 1; } else set = 0; } else { lun->lasttpt = 0; ctl_clr_ua_all(lun, -1, CTL_UA_THIN_PROV_THRES); set = -1; } mtx_unlock(&lun->lun_lock); if (set != 0 && lun->ctl_softc->ha_mode == CTL_HA_MODE_XFER) { /* Send msg to other side. */ bzero(&msg.ua, sizeof(msg.ua)); msg.hdr.msg_type = CTL_MSG_UA; msg.hdr.nexus.initid = -1; msg.hdr.nexus.targ_port = -1; msg.hdr.nexus.targ_lun = lun->lun; msg.hdr.nexus.targ_mapped_lun = lun->lun; msg.ua.ua_all = 1; msg.ua.ua_set = (set > 0); msg.ua.ua_type = CTL_UA_THIN_PROV_THRES; memcpy(msg.ua.ua_info, lun->ua_tpt_info, 8); mtx_unlock(&softc->ctl_lock); // XXX ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg, sizeof(msg.ua), M_WAITOK); mtx_lock(&softc->ctl_lock); } } mtx_unlock(&softc->ctl_lock); pause("-", CTL_LBP_PERIOD * hz); } } static void ctl_enqueue_incoming(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; u_int idx; idx = (io->io_hdr.nexus.targ_port * 127 + io->io_hdr.nexus.initid) % worker_threads; thr = &softc->threads[idx]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->incoming_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } static void ctl_enqueue_rtr(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->rtr_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } static void ctl_enqueue_done(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->done_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } static void ctl_enqueue_isc(union ctl_io *io) { struct ctl_softc *softc = control_softc; struct ctl_thread *thr; thr = &softc->threads[io->io_hdr.nexus.targ_mapped_lun % worker_threads]; mtx_lock(&thr->queue_lock); STAILQ_INSERT_TAIL(&thr->isc_queue, &io->io_hdr, links); mtx_unlock(&thr->queue_lock); wakeup(thr); } /* * vim: ts=8 */ Index: projects/clang370-import/sys/cam/ctl/ctl_backend_block.c =================================================================== --- projects/clang370-import/sys/cam/ctl/ctl_backend_block.c (revision 288125) +++ projects/clang370-import/sys/cam/ctl/ctl_backend_block.c (revision 288126) @@ -1,2995 +1,2912 @@ /*- * Copyright (c) 2003 Silicon Graphics International Corp. * Copyright (c) 2009-2011 Spectra Logic Corporation * Copyright (c) 2012 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Edward Tomasz Napierala * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $ */ /* * CAM Target Layer driver backend for block devices. * * Author: Ken Merry */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The idea here is that we'll allocate enough S/G space to hold a 1MB * I/O. If we get an I/O larger than that, we'll split it. */ #define CTLBLK_HALF_IO_SIZE (512 * 1024) #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) #define CTLBLK_MAX_SEG MAXPHYS #define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) #ifdef CTLBLK_DEBUG #define DPRINTF(fmt, args...) \ printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) do {} while(0) #endif #define PRIV(io) \ ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND]) #define ARGS(io) \ ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]) SDT_PROVIDER_DEFINE(cbb); typedef enum { CTL_BE_BLOCK_LUN_UNCONFIGURED = 0x01, CTL_BE_BLOCK_LUN_CONFIG_ERR = 0x02, CTL_BE_BLOCK_LUN_WAITING = 0x04, } ctl_be_block_lun_flags; typedef enum { CTL_BE_BLOCK_NONE, CTL_BE_BLOCK_DEV, CTL_BE_BLOCK_FILE } ctl_be_block_type; struct ctl_be_block_filedata { struct ucred *cred; }; union ctl_be_block_bedata { struct ctl_be_block_filedata file; }; struct ctl_be_block_io; struct ctl_be_block_lun; typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun, const char *attrname); /* * Backend LUN structure. There is a 1:1 mapping between a block device * and a backend block LUN, and between a backend block LUN and a CTL LUN. */ struct ctl_be_block_lun { struct ctl_lun_create_params params; char lunname[32]; char *dev_path; ctl_be_block_type dev_type; struct vnode *vn; union ctl_be_block_bedata backend; cbb_dispatch_t dispatch; cbb_dispatch_t lun_flush; cbb_dispatch_t unmap; cbb_dispatch_t get_lba_status; cbb_getattr_t getattr; uma_zone_t lun_zone; uint64_t size_blocks; uint64_t size_bytes; struct ctl_be_block_softc *softc; struct devstat *disk_stats; ctl_be_block_lun_flags flags; STAILQ_ENTRY(ctl_be_block_lun) links; struct ctl_be_lun cbe_lun; struct taskqueue *io_taskqueue; struct task io_task; int num_threads; STAILQ_HEAD(, ctl_io_hdr) input_queue; STAILQ_HEAD(, ctl_io_hdr) config_read_queue; STAILQ_HEAD(, ctl_io_hdr) config_write_queue; STAILQ_HEAD(, ctl_io_hdr) datamove_queue; struct mtx_padalign io_lock; struct mtx_padalign queue_lock; }; /* * Overall softc structure for the block backend module. */ struct ctl_be_block_softc { struct mtx lock; int num_luns; STAILQ_HEAD(, ctl_be_block_lun) lun_list; }; static struct ctl_be_block_softc backend_block_softc; /* * Per-I/O information. */ struct ctl_be_block_io { union ctl_io *io; struct ctl_sg_entry sg_segs[CTLBLK_MAX_SEGS]; struct iovec xiovecs[CTLBLK_MAX_SEGS]; int bio_cmd; int num_segs; int num_bios_sent; int num_bios_done; int send_complete; int num_errors; struct bintime ds_t0; devstat_tag_type ds_tag_type; devstat_trans_flags ds_trans_type; uint64_t io_len; uint64_t io_offset; int io_arg; struct ctl_be_block_softc *softc; struct ctl_be_block_lun *lun; void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */ }; extern struct ctl_softc *control_softc; static int cbb_num_threads = 14; SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0, "CAM Target Layer Block Backend"); SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN, &cbb_num_threads, 0, "Number of threads per backing file"); static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc); static void ctl_free_beio(struct ctl_be_block_io *beio); static void ctl_complete_beio(struct ctl_be_block_io *beio); static int ctl_be_block_move_done(union ctl_io *io); static void ctl_be_block_biodone(struct bio *bio); static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname); static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio); static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname); static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io); static void ctl_be_block_worker(void *context, int pending); static int ctl_be_block_submit(union ctl_io *io); static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td); static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_close(struct ctl_be_block_lun *be_lun); static int ctl_be_block_open(struct ctl_be_block_softc *softc, struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req); static int ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static int ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); -static int ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, - struct ctl_lun_req *req); -static int ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, - struct ctl_lun_req *req); static int ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req); static void ctl_be_block_lun_shutdown(void *be_lun); static void ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status); static int ctl_be_block_config_write(union ctl_io *io); static int ctl_be_block_config_read(union ctl_io *io); static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb); static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname); int ctl_be_block_init(void); static struct ctl_backend_driver ctl_be_block_driver = { .name = "block", .flags = CTL_BE_FLAG_HAS_CONFIG, .init = ctl_be_block_init, .data_submit = ctl_be_block_submit, .data_move_done = ctl_be_block_move_done, .config_read = ctl_be_block_config_read, .config_write = ctl_be_block_config_write, .ioctl = ctl_be_block_ioctl, .lun_info = ctl_be_block_lun_info, .lun_attr = ctl_be_block_lun_attr }; MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend"); CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver); static uma_zone_t beio_zone; static struct ctl_be_block_io * ctl_alloc_beio(struct ctl_be_block_softc *softc) { struct ctl_be_block_io *beio; beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO); beio->softc = softc; return (beio); } static void ctl_free_beio(struct ctl_be_block_io *beio) { int duplicate_free; int i; duplicate_free = 0; for (i = 0; i < beio->num_segs; i++) { if (beio->sg_segs[i].addr == NULL) duplicate_free++; uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr); beio->sg_segs[i].addr = NULL; /* For compare we had two equal S/G lists. */ if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) { uma_zfree(beio->lun->lun_zone, beio->sg_segs[i + CTLBLK_HALF_SEGS].addr); beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL; } } if (duplicate_free > 0) { printf("%s: %d duplicate frees out of %d segments\n", __func__, duplicate_free, beio->num_segs); } uma_zfree(beio_zone, beio); } static void ctl_complete_beio(struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; if (beio->beio_cont != NULL) { beio->beio_cont(beio); } else { ctl_free_beio(beio); ctl_data_submit_done(io); } } static size_t cmp(uint8_t *a, uint8_t *b, size_t size) { size_t i; for (i = 0; i < size; i++) { if (a[i] != b[i]) break; } return (i); } static void ctl_be_block_compare(union ctl_io *io) { struct ctl_be_block_io *beio; uint64_t off, res; int i; uint8_t info[8]; beio = (struct ctl_be_block_io *)PRIV(io)->ptr; off = 0; for (i = 0; i < beio->num_segs; i++) { res = cmp(beio->sg_segs[i].addr, beio->sg_segs[i + CTLBLK_HALF_SEGS].addr, beio->sg_segs[i].len); off += res; if (res < beio->sg_segs[i].len) break; } if (i < beio->num_segs) { scsi_u64to8b(off, info); ctl_set_sense(&io->scsiio, /*current_error*/ 1, /*sense_key*/ SSD_KEY_MISCOMPARE, /*asc*/ 0x1D, /*ascq*/ 0x00, /*type*/ SSD_ELEM_INFO, /*size*/ sizeof(info), /*data*/ &info, /*type*/ SSD_ELEM_NONE); } else ctl_set_success(&io->scsiio); } static int ctl_be_block_move_done(union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_lun *be_lun; struct ctl_lba_len_flags *lbalen; #ifdef CTL_TIME_IO struct bintime cur_bt; #endif beio = (struct ctl_be_block_io *)PRIV(io)->ptr; be_lun = beio->lun; DPRINTF("entered\n"); #ifdef CTL_TIME_IO getbintime(&cur_bt); bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt); bintime_add(&io->io_hdr.dma_bt, &cur_bt); io->io_hdr.num_dmas++; #endif io->scsiio.kern_rel_offset += io->scsiio.kern_data_len; /* * We set status at this point for read commands, and write * commands with errors. */ if (io->io_hdr.flags & CTL_FLAG_ABORT) { ; } else if ((io->io_hdr.port_status == 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) { lbalen = ARGS(beio->io); if (lbalen->flags & CTL_LLF_READ) { ctl_set_success(&io->scsiio); } else if (lbalen->flags & CTL_LLF_COMPARE) { /* We have two data blocks ready for comparison. */ ctl_be_block_compare(io); } } else if ((io->io_hdr.port_status != 0) && ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE || (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) { /* * For hardware error sense keys, the sense key * specific value is defined to be a retry count, * but we use it to pass back an internal FETD * error code. XXX KDM Hopefully the FETD is only * using 16 bits for an error code, since that's * all the space we have in the sks field. */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ io->io_hdr.port_status); } /* * If this is a read, or a write with errors, it is done. */ if ((beio->bio_cmd == BIO_READ) || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) { ctl_complete_beio(beio); return (0); } /* * At this point, we have a write and the DMA completed * successfully. We now have to queue it to the task queue to * execute the backend I/O. That is because we do blocking * memory allocations, and in the file backing case, blocking I/O. * This move done routine is generally called in the SIM's * interrupt context, and therefore we cannot block. */ mtx_lock(&be_lun->queue_lock); /* * XXX KDM make sure that links is okay to use at this point. * Otherwise, we either need to add another field to ctl_io_hdr, * or deal with resource allocation here. */ STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (0); } static void ctl_be_block_biodone(struct bio *bio) { struct ctl_be_block_io *beio; struct ctl_be_block_lun *be_lun; union ctl_io *io; int error; beio = bio->bio_caller1; be_lun = beio->lun; io = beio->io; DPRINTF("entered\n"); error = bio->bio_error; mtx_lock(&be_lun->io_lock); if (error != 0) beio->num_errors++; beio->num_bios_done++; /* * XXX KDM will this cause WITNESS to complain? Holding a lock * during the free might cause it to complain. */ g_destroy_bio(bio); /* * If the send complete bit isn't set, or we aren't the last I/O to * complete, then we're done. */ if ((beio->send_complete == 0) || (beio->num_bios_done < beio->num_bios_sent)) { mtx_unlock(&be_lun->io_lock); return; } /* * At this point, we've verified that we are the last I/O to * complete, so it's safe to drop the lock. */ devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If there are any errors from the backing device, we fail the * entire I/O with a medium error. */ if (beio->num_errors > 0) { if (error == EOPNOTSUPP) { ctl_set_invalid_opcode(&io->scsiio); } else if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (error == EROFS || error == EACCES) { ctl_set_hw_write_protected(&io->scsiio); } else if (beio->bio_cmd == BIO_FLUSH) { /* XXX KDM is there is a better error here? */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xbad2); } else { ctl_set_medium_error(&io->scsiio, beio->bio_cmd == BIO_READ); } ctl_complete_beio(beio); return; } /* * If this is a write, a flush, a delete or verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (beio->bio_cmd == BIO_FLUSH) || (beio->bio_cmd == BIO_DELETE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) { ctl_set_success(&io->scsiio); ctl_serseq_done(io); } #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct mount *mountpoint; int error, lock_flags; DPRINTF("entered\n"); binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); (void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT); if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_lock(be_lun->vn, lock_flags | LK_RETRY); error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT, curthread); VOP_UNLOCK(be_lun->vn, 0); vn_finished_write(mountpoint); mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (error == 0) ctl_set_success(&io->scsiio); else { /* XXX KDM is there is a better error here? */ ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xbad1); } ctl_complete_beio(beio); } SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t"); static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct ctl_be_block_filedata *file_data; union ctl_io *io; struct uio xuio; struct iovec *xiovec; size_t s; int error, flags, i; DPRINTF("entered\n"); file_data = &be_lun->backend.file; io = beio->io; flags = 0; if (ARGS(io)->flags & CTL_LLF_DPO) flags |= IO_DIRECT; if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) flags |= IO_SYNC; bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_READ; } else { SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; xuio.uio_resid = beio->io_len; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = beio->xiovecs; xuio.uio_iovcnt = beio->num_segs; xuio.uio_td = curthread; for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { xiovec->iov_base = beio->sg_segs[i].addr; xiovec->iov_len = beio->sg_segs[i].len; } binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (beio->bio_cmd == BIO_READ) { vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); /* * UFS pays attention to IO_DIRECT for reads. If the * DIRECTIO option is configured into the kernel, it calls * ffs_rawread(). But that only works for single-segment * uios with user space addresses. In our case, with a * kernel uio, it still reads into the buffer cache, but it * will just try to release the buffer from the cache later * on in ffs_read(). * * ZFS does not pay attention to IO_DIRECT for reads. * * UFS does not pay attention to IO_SYNC for reads. * * ZFS pays attention to IO_SYNC (which translates into the * Solaris define FRSYNC for zfs_read()) for reads. It * attempts to sync the file before reading. */ error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn, 0); SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); if (error == 0 && xuio.uio_resid > 0) { /* * If we red less then requested (EOF), then * we should clean the rest of the buffer. */ s = beio->io_len - xuio.uio_resid; for (i = 0; i < beio->num_segs; i++) { if (s >= beio->sg_segs[i].len) { s -= beio->sg_segs[i].len; continue; } bzero((uint8_t *)beio->sg_segs[i].addr + s, beio->sg_segs[i].len - s); s = 0; } } } else { struct mount *mountpoint; int lock_flags; (void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT); if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount))) lock_flags = LK_SHARED; else lock_flags = LK_EXCLUSIVE; vn_lock(be_lun->vn, lock_flags | LK_RETRY); /* * UFS pays attention to IO_DIRECT for writes. The write * is done asynchronously. (Normally the write would just * get put into cache. * * UFS pays attention to IO_SYNC for writes. It will * attempt to write the buffer out synchronously if that * flag is set. * * ZFS does not pay attention to IO_DIRECT for writes. * * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC) * for writes. It will flush the transaction from the * cache before returning. */ error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred); VOP_UNLOCK(be_lun->vn, 0); vn_finished_write(mountpoint); SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); } mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If we got an error, set the sense data to "MEDIUM ERROR" and * return the I/O to the user. */ if (error != 0) { if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (error == EROFS || error == EACCES) { ctl_set_hw_write_protected(&io->scsiio); } else { ctl_set_medium_error(&io->scsiio, beio->bio_cmd == BIO_READ); } ctl_complete_beio(beio); return; } /* * If this is a write or a verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) { ctl_set_success(&io->scsiio); ctl_serseq_done(io); } #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct ctl_lba_len_flags *lbalen = ARGS(io); struct scsi_get_lba_status_data *data; off_t roff, off; int error, status; DPRINTF("entered\n"); off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off, 0, curthread->td_ucred, curthread); if (error == 0 && off > roff) status = 0; /* mapped up to off */ else { error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off, 0, curthread->td_ucred, curthread); if (error == 0 && off > roff) status = 1; /* deallocated up to off */ else { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; } } VOP_UNLOCK(be_lun->vn, 0); data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; scsi_u64to8b(lbalen->lba, data->descr[0].addr); scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - lbalen->lba), data->descr[0].length); data->descr[0].status = status; ctl_complete_beio(beio); } static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname) { struct vattr vattr; struct statfs statfs; uint64_t val; int error; val = UINT64_MAX; if (be_lun->vn == NULL) return (val); vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); if (strcmp(attrname, "blocksused") == 0) { error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); if (error == 0) val = vattr.va_bytes / be_lun->cbe_lun.blocksize; } if (strcmp(attrname, "blocksavail") == 0 && (be_lun->vn->v_iflag & VI_DOOMED) == 0) { error = VFS_STATFS(be_lun->vn->v_mount, &statfs); if (error == 0) val = statfs.f_bavail * statfs.f_bsize / be_lun->cbe_lun.blocksize; } VOP_UNLOCK(be_lun->vn, 0); return (val); } static void ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io; struct cdevsw *csw; struct cdev *dev; struct uio xuio; struct iovec *xiovec; int error, flags, i, ref; DPRINTF("entered\n"); io = beio->io; flags = 0; if (ARGS(io)->flags & CTL_LLF_DPO) flags |= IO_DIRECT; if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA) flags |= IO_SYNC; bzero(&xuio, sizeof(xuio)); if (beio->bio_cmd == BIO_READ) { SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_READ; } else { SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0); xuio.uio_rw = UIO_WRITE; } xuio.uio_offset = beio->io_offset; xuio.uio_resid = beio->io_len; xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = beio->xiovecs; xuio.uio_iovcnt = beio->num_segs; xuio.uio_td = curthread; for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) { xiovec->iov_base = beio->sg_segs[i].addr; xiovec->iov_len = beio->sg_segs[i].len; } binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw) { if (beio->bio_cmd == BIO_READ) error = csw->d_read(dev, &xuio, flags); else error = csw->d_write(dev, &xuio, flags); dev_relthread(dev, ref); } else error = ENXIO; if (beio->bio_cmd == BIO_READ) SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0); else SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0); mtx_lock(&be_lun->io_lock); devstat_end_transaction(beio->lun->disk_stats, beio->io_len, beio->ds_tag_type, beio->ds_trans_type, /*now*/ NULL, /*then*/&beio->ds_t0); mtx_unlock(&be_lun->io_lock); /* * If we got an error, set the sense data to "MEDIUM ERROR" and * return the I/O to the user. */ if (error != 0) { if (error == ENOSPC || error == EDQUOT) { ctl_set_space_alloc_fail(&io->scsiio); } else if (error == EROFS || error == EACCES) { ctl_set_hw_write_protected(&io->scsiio); } else { ctl_set_medium_error(&io->scsiio, beio->bio_cmd == BIO_READ); } ctl_complete_beio(beio); return; } /* * If this is a write or a verify, we're all done. * If this is a read, we can now send the data to the user. */ if ((beio->bio_cmd == BIO_WRITE) || (ARGS(io)->flags & CTL_LLF_VERIFY)) { ctl_set_success(&io->scsiio); ctl_complete_beio(beio); } else { if ((ARGS(io)->flags & CTL_LLF_READ) && beio->beio_cont == NULL) { ctl_set_success(&io->scsiio); ctl_serseq_done(io); } #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io = beio->io; struct cdevsw *csw; struct cdev *dev; struct ctl_lba_len_flags *lbalen = ARGS(io); struct scsi_get_lba_status_data *data; off_t roff, off; int error, ref, status; DPRINTF("entered\n"); csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw == NULL) { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; goto done; } off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize; error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD, curthread); if (error == 0 && off > roff) status = 0; /* mapped up to off */ else { error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD, curthread); if (error == 0 && off > roff) status = 1; /* deallocated up to off */ else { status = 0; /* unknown up to the end */ off = be_lun->size_bytes; } } dev_relthread(dev, ref); done: data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr; scsi_u64to8b(lbalen->lba, data->descr[0].addr); scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize - lbalen->lba), data->descr[0].length); data->descr[0].status = status; ctl_complete_beio(beio); } static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { struct bio *bio; union ctl_io *io; struct cdevsw *csw; struct cdev *dev; int ref; io = beio->io; DPRINTF("entered\n"); /* This can't fail, it's a blocking allocation. */ bio = g_alloc_bio(); bio->bio_cmd = BIO_FLUSH; bio->bio_offset = 0; bio->bio_data = 0; bio->bio_done = ctl_be_block_biodone; bio->bio_caller1 = beio; bio->bio_pblkno = 0; /* * We don't need to acquire the LUN lock here, because we are only * sending one bio, and so there is no other context to synchronize * with. */ beio->num_bios_sent = 1; beio->send_complete = 1; binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw) { bio->bio_dev = dev; csw->d_strategy(bio); dev_relthread(dev, ref); } else { bio->bio_error = ENXIO; ctl_be_block_biodone(bio); } } static void ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio, uint64_t off, uint64_t len, int last) { struct bio *bio; uint64_t maxlen; struct cdevsw *csw; struct cdev *dev; int ref; csw = devvn_refthread(be_lun->vn, &dev, &ref); maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize); while (len > 0) { bio = g_alloc_bio(); bio->bio_cmd = BIO_DELETE; bio->bio_dev = dev; bio->bio_offset = off; bio->bio_length = MIN(len, maxlen); bio->bio_data = 0; bio->bio_done = ctl_be_block_biodone; bio->bio_caller1 = beio; bio->bio_pblkno = off / be_lun->cbe_lun.blocksize; off += bio->bio_length; len -= bio->bio_length; mtx_lock(&be_lun->io_lock); beio->num_bios_sent++; if (last && len == 0) beio->send_complete = 1; mtx_unlock(&be_lun->io_lock); if (csw) { csw->d_strategy(bio); } else { bio->bio_error = ENXIO; ctl_be_block_biodone(bio); } } if (csw) dev_relthread(dev, ref); } static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { union ctl_io *io; struct ctl_ptr_len_flags *ptrlen; struct scsi_unmap_desc *buf, *end; uint64_t len; io = beio->io; DPRINTF("entered\n"); binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); mtx_unlock(&be_lun->io_lock); if (beio->io_offset == -1) { beio->io_len = 0; ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; buf = (struct scsi_unmap_desc *)ptrlen->ptr; end = buf + ptrlen->len / sizeof(*buf); for (; buf < end; buf++) { len = (uint64_t)scsi_4btoul(buf->length) * be_lun->cbe_lun.blocksize; beio->io_len += len; ctl_be_block_unmap_dev_range(be_lun, beio, scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize, len, (end - buf < 2) ? TRUE : FALSE); } } else ctl_be_block_unmap_dev_range(be_lun, beio, beio->io_offset, beio->io_len, TRUE); } static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun, struct ctl_be_block_io *beio) { TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); struct bio *bio; struct cdevsw *csw; struct cdev *dev; off_t cur_offset; int i, max_iosize, ref; DPRINTF("entered\n"); csw = devvn_refthread(be_lun->vn, &dev, &ref); /* * We have to limit our I/O size to the maximum supported by the * backend device. Hopefully it is MAXPHYS. If the driver doesn't * set it properly, use DFLTPHYS. */ if (csw) { max_iosize = dev->si_iosize_max; if (max_iosize < PAGE_SIZE) max_iosize = DFLTPHYS; } else max_iosize = DFLTPHYS; cur_offset = beio->io_offset; for (i = 0; i < beio->num_segs; i++) { size_t cur_size; uint8_t *cur_ptr; cur_size = beio->sg_segs[i].len; cur_ptr = beio->sg_segs[i].addr; while (cur_size > 0) { /* This can't fail, it's a blocking allocation. */ bio = g_alloc_bio(); KASSERT(bio != NULL, ("g_alloc_bio() failed!\n")); bio->bio_cmd = beio->bio_cmd; bio->bio_dev = dev; bio->bio_caller1 = beio; bio->bio_length = min(cur_size, max_iosize); bio->bio_offset = cur_offset; bio->bio_data = cur_ptr; bio->bio_done = ctl_be_block_biodone; bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize; cur_offset += bio->bio_length; cur_ptr += bio->bio_length; cur_size -= bio->bio_length; TAILQ_INSERT_TAIL(&queue, bio, bio_queue); beio->num_bios_sent++; } } binuptime(&beio->ds_t0); mtx_lock(&be_lun->io_lock); devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0); beio->send_complete = 1; mtx_unlock(&be_lun->io_lock); /* * Fire off all allocated requests! */ while ((bio = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, bio, bio_queue); if (csw) csw->d_strategy(bio); else { bio->bio_error = ENXIO; ctl_be_block_biodone(bio); } } if (csw) dev_relthread(dev, ref); } static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname) { struct diocgattr_arg arg; struct cdevsw *csw; struct cdev *dev; int error, ref; csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw == NULL) return (UINT64_MAX); strlcpy(arg.name, attrname, sizeof(arg.name)); arg.len = sizeof(arg.value.off); if (csw->d_ioctl) { error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, curthread); } else error = ENODEV; dev_relthread(dev, ref); if (error != 0) return (UINT64_MAX); return (arg.value.off); } static void ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_lba_len_flags *lbalen; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; beio->io_len = lbalen->len * cbe_lun->blocksize; beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_arg = (lbalen->flags & SSC_IMMED) != 0; beio->bio_cmd = BIO_FLUSH; beio->ds_trans_type = DEVSTAT_NO_DATA; DPRINTF("SYNC\n"); be_lun->lun_flush(be_lun, beio); } static void ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); if ((io->io_hdr.flags & CTL_FLAG_ABORT) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { ctl_config_write_done(io); return; } ctl_be_block_config_write(io); } static void ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_lba_len_flags *lbalen; uint64_t len_left, lba; uint32_t pb, pbo, adj; int i, seglen; uint8_t *buf, *end; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; lbalen = ARGS(beio->io); if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) || (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) { ctl_free_beio(beio); ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 0, /*bit*/ 0); ctl_config_write_done(io); return; } if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) { beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize; beio->bio_cmd = BIO_DELETE; beio->ds_trans_type = DEVSTAT_FREE; be_lun->unmap(be_lun, beio); return; } beio->bio_cmd = BIO_WRITE; beio->ds_trans_type = DEVSTAT_WRITE; DPRINTF("WRITE SAME at LBA %jx len %u\n", (uintmax_t)lbalen->lba, lbalen->len); pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp; if (be_lun->cbe_lun.pblockoff > 0) pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff; else pbo = 0; len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { /* * Setup the S/G entry for this chunk. */ seglen = MIN(CTLBLK_MAX_SEG, len_left); if (pb > cbe_lun->blocksize) { adj = ((lbalen->lba + lba) * cbe_lun->blocksize + seglen - pbo) % pb; if (seglen > adj) seglen -= adj; else seglen -= seglen % cbe_lun->blocksize; } else seglen -= seglen % cbe_lun->blocksize; beio->sg_segs[i].len = seglen; beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); beio->num_segs++; len_left -= seglen; buf = beio->sg_segs[i].addr; end = buf + seglen; for (; buf < end; buf += cbe_lun->blocksize) { memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize); if (lbalen->flags & SWS_LBDATA) scsi_ulto4b(lbalen->lba + lba, buf); lba++; } } beio->io_offset = lbalen->lba * cbe_lun->blocksize; beio->io_len = lba * cbe_lun->blocksize; /* We can not do all in one run. Correct and schedule rerun. */ if (len_left > 0) { lbalen->lba += lba; lbalen->len -= lba; beio->beio_cont = ctl_be_block_cw_done_ws; } be_lun->dispatch(be_lun, beio); } static void ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_ptr_len_flags *ptrlen; DPRINTF("entered\n"); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN]; if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) { ctl_free_beio(beio); ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 0, /*command*/ 1, /*field*/ 0, /*bit_valid*/ 0, /*bit*/ 0); ctl_config_write_done(io); return; } beio->io_len = 0; beio->io_offset = -1; beio->bio_cmd = BIO_DELETE; beio->ds_trans_type = DEVSTAT_FREE; DPRINTF("UNMAP\n"); be_lun->unmap(be_lun, beio); } static void ctl_be_block_cr_done(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); ctl_config_read_done(io); } static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; DPRINTF("entered\n"); softc = be_lun->softc; beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; beio->beio_cont = ctl_be_block_cr_done; PRIV(io)->ptr = (void *)beio; switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: /* GET LBA STATUS */ beio->bio_cmd = -1; beio->ds_trans_type = DEVSTAT_NO_DATA; beio->ds_tag_type = DEVSTAT_TAG_ORDERED; beio->io_len = 0; if (be_lun->get_lba_status) be_lun->get_lba_status(be_lun, beio); else ctl_be_block_cr_done(beio); break; default: panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); break; } } static void ctl_be_block_cw_done(struct ctl_be_block_io *beio) { union ctl_io *io; io = beio->io; ctl_free_beio(beio); ctl_config_write_done(io); } static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; DPRINTF("entered\n"); softc = be_lun->softc; beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; beio->beio_cont = ctl_be_block_cw_done; switch (io->scsiio.tag_type) { case CTL_TAG_ORDERED: beio->ds_tag_type = DEVSTAT_TAG_ORDERED; break; case CTL_TAG_HEAD_OF_QUEUE: beio->ds_tag_type = DEVSTAT_TAG_HEAD; break; case CTL_TAG_UNTAGGED: case CTL_TAG_SIMPLE: case CTL_TAG_ACA: default: beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; break; } PRIV(io)->ptr = (void *)beio; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: ctl_be_block_cw_dispatch_sync(be_lun, io); break; case WRITE_SAME_10: case WRITE_SAME_16: ctl_be_block_cw_dispatch_ws(be_lun, io); break; case UNMAP: ctl_be_block_cw_dispatch_unmap(be_lun, io); break; default: panic("Unhandled CDB type %#x", io->scsiio.cdb[0]); break; } } SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t"); SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t"); static void ctl_be_block_next(struct ctl_be_block_io *beio) { struct ctl_be_block_lun *be_lun; union ctl_io *io; io = beio->io; be_lun = beio->lun; ctl_free_beio(beio); if ((io->io_hdr.flags & CTL_FLAG_ABORT) || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE && (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) { ctl_data_submit_done(io); return; } io->io_hdr.status &= ~CTL_STATUS_MASK; io->io_hdr.status |= CTL_STATUS_NONE; mtx_lock(&be_lun->queue_lock); /* * XXX KDM make sure that links is okay to use at this point. * Otherwise, we either need to add another field to ctl_io_hdr, * or deal with resource allocation here. */ STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); } static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun, union ctl_io *io) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_be_block_io *beio; struct ctl_be_block_softc *softc; struct ctl_lba_len_flags *lbalen; struct ctl_ptr_len_flags *bptrlen; uint64_t len_left, lbas; int i; softc = be_lun->softc; DPRINTF("entered\n"); lbalen = ARGS(io); if (lbalen->flags & CTL_LLF_WRITE) { SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0); } else { SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0); } beio = ctl_alloc_beio(softc); beio->io = io; beio->lun = be_lun; bptrlen = PRIV(io); bptrlen->ptr = (void *)beio; switch (io->scsiio.tag_type) { case CTL_TAG_ORDERED: beio->ds_tag_type = DEVSTAT_TAG_ORDERED; break; case CTL_TAG_HEAD_OF_QUEUE: beio->ds_tag_type = DEVSTAT_TAG_HEAD; break; case CTL_TAG_UNTAGGED: case CTL_TAG_SIMPLE: case CTL_TAG_ACA: default: beio->ds_tag_type = DEVSTAT_TAG_SIMPLE; break; } if (lbalen->flags & CTL_LLF_WRITE) { beio->bio_cmd = BIO_WRITE; beio->ds_trans_type = DEVSTAT_WRITE; } else { beio->bio_cmd = BIO_READ; beio->ds_trans_type = DEVSTAT_READ; } DPRINTF("%s at LBA %jx len %u @%ju\n", (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE", (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len); if (lbalen->flags & CTL_LLF_COMPARE) lbas = CTLBLK_HALF_IO_SIZE; else lbas = CTLBLK_MAX_IO_SIZE; lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize); beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize; beio->io_len = lbas * cbe_lun->blocksize; bptrlen->len += lbas; for (i = 0, len_left = beio->io_len; len_left > 0; i++) { KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)", i, CTLBLK_MAX_SEGS)); /* * Setup the S/G entry for this chunk. */ beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left); beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); /* Set up second segment for compare operation. */ if (lbalen->flags & CTL_LLF_COMPARE) { beio->sg_segs[i + CTLBLK_HALF_SEGS].len = beio->sg_segs[i].len; beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK); } beio->num_segs++; len_left -= beio->sg_segs[i].len; } if (bptrlen->len < lbalen->len) beio->beio_cont = ctl_be_block_next; io->scsiio.be_move_done = ctl_be_block_move_done; /* For compare we have separate S/G lists for read and datamove. */ if (lbalen->flags & CTL_LLF_COMPARE) io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS]; else io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs; io->scsiio.kern_data_len = beio->io_len; io->scsiio.kern_data_resid = 0; io->scsiio.kern_sg_entries = beio->num_segs; io->io_hdr.flags |= CTL_FLAG_ALLOCATED; /* * For the read case, we need to read the data into our buffers and * then we can send it back to the user. For the write case, we * need to get the data from the user first. */ if (beio->bio_cmd == BIO_READ) { SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0); be_lun->dispatch(be_lun, beio); } else { SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0); #ifdef CTL_TIME_IO getbintime(&io->io_hdr.dma_start_bt); #endif ctl_datamove(io); } } static void ctl_be_block_worker(void *context, int pending) { struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context; struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; union ctl_io *io; struct ctl_be_block_io *beio; DPRINTF("entered\n"); /* * Fetch and process I/Os from all queues. If we detect LUN * CTL_LUN_FLAG_OFFLINE status here -- it is result of a race, * so make response maximally opaque to not confuse initiator. */ for (;;) { mtx_lock(&be_lun->queue_lock); io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue); if (io != NULL) { DPRINTF("datamove queue\n"); STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); beio = (struct ctl_be_block_io *)PRIV(io)->ptr; if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { ctl_set_busy(&io->scsiio); ctl_complete_beio(beio); return; } be_lun->dispatch(be_lun, beio); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue); if (io != NULL) { DPRINTF("config write queue\n"); STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { ctl_set_busy(&io->scsiio); ctl_config_write_done(io); return; } ctl_be_block_cw_dispatch(be_lun, io); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue); if (io != NULL) { DPRINTF("config read queue\n"); STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { ctl_set_busy(&io->scsiio); ctl_config_read_done(io); return; } ctl_be_block_cr_dispatch(be_lun, io); continue; } io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue); if (io != NULL) { DPRINTF("input queue\n"); STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr, ctl_io_hdr, links); mtx_unlock(&be_lun->queue_lock); if (cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) { ctl_set_busy(&io->scsiio); ctl_data_submit_done(io); return; } ctl_be_block_dispatch(be_lun, io); continue; } /* * If we get here, there is no work left in the queues, so * just break out and let the task queue go to sleep. */ mtx_unlock(&be_lun->queue_lock); break; } } /* * Entry point from CTL to the backend for I/O. We queue everything to a * work thread, so this just puts the I/O on a queue and wakes up the * thread. */ static int ctl_be_block_submit(union ctl_io *io) { struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; DPRINTF("entered\n"); cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; /* * Make sure we only get SCSI I/O. */ KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type " "%#x) encountered", io->io_hdr.io_type)); PRIV(io)->len = 0; mtx_lock(&be_lun->queue_lock); /* * XXX KDM make sure that links is okay to use at this point. * Otherwise, we either need to add another field to ctl_io_hdr, * or deal with resource allocation here. */ STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); return (CTL_RETVAL_COMPLETE); } static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct ctl_be_block_softc *softc; int error; softc = &backend_block_softc; error = 0; switch (cmd) { case CTL_LUN_REQ: { struct ctl_lun_req *lun_req; lun_req = (struct ctl_lun_req *)addr; switch (lun_req->reqtype) { case CTL_LUNREQ_CREATE: error = ctl_be_block_create(softc, lun_req); break; case CTL_LUNREQ_RM: error = ctl_be_block_rm(softc, lun_req); break; case CTL_LUNREQ_MODIFY: error = ctl_be_block_modify(softc, lun_req); break; default: lun_req->status = CTL_LUN_ERROR; snprintf(lun_req->error_str, sizeof(lun_req->error_str), "invalid LUN request type %d", lun_req->reqtype); break; } break; } default: error = ENOTTY; break; } return (error); } static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun; struct ctl_be_block_filedata *file_data; struct ctl_lun_create_params *params; char *value; struct vattr vattr; off_t ps, pss, po, pos, us, uss, uo, uos; int error; error = 0; cbe_lun = &be_lun->cbe_lun; file_data = &be_lun->backend.file; params = &be_lun->params; be_lun->dev_type = CTL_BE_BLOCK_FILE; be_lun->dispatch = ctl_be_block_dispatch_file; be_lun->lun_flush = ctl_be_block_flush_file; be_lun->get_lba_status = ctl_be_block_gls_file; be_lun->getattr = ctl_be_block_getattr_file; be_lun->unmap = NULL; cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); if (error != 0) { snprintf(req->error_str, sizeof(req->error_str), "error calling VOP_GETATTR() for file %s", be_lun->dev_path); return (error); } /* * Verify that we have the ability to upgrade to exclusive * access on this file so we can trap errors at open instead * of reporting them during first access. */ if (VOP_ISLOCKED(be_lun->vn) != LK_EXCLUSIVE) { vn_lock(be_lun->vn, LK_UPGRADE | LK_RETRY); if (be_lun->vn->v_iflag & VI_DOOMED) { error = EBADF; snprintf(req->error_str, sizeof(req->error_str), "error locking file %s", be_lun->dev_path); return (error); } } file_data->cred = crhold(curthread->td_ucred); if (params->lun_size_bytes != 0) be_lun->size_bytes = params->lun_size_bytes; else be_lun->size_bytes = vattr.va_size; /* * For files we can use any logical block size. Prefer 512 bytes * for compatibility reasons. If file's vattr.va_blocksize * (preferred I/O block size) is bigger and multiple to chosen * logical block size -- report it as physical block size. */ if (params->blocksize_bytes != 0) cbe_lun->blocksize = params->blocksize_bytes; else cbe_lun->blocksize = 512; be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 0 : (be_lun->size_blocks - 1); us = ps = vattr.va_blocksize; uo = po = 0; value = ctl_get_opt(&cbe_lun->options, "pblocksize"); if (value != NULL) ctl_expand_number(value, &ps); value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); if (value != NULL) ctl_expand_number(value, &po); pss = ps / cbe_lun->blocksize; pos = po / cbe_lun->blocksize; if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { cbe_lun->pblockexp = fls(pss) - 1; cbe_lun->pblockoff = (pss - pos) % pss; } value = ctl_get_opt(&cbe_lun->options, "ublocksize"); if (value != NULL) ctl_expand_number(value, &us); value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); if (value != NULL) ctl_expand_number(value, &uo); uss = us / cbe_lun->blocksize; uos = uo / cbe_lun->blocksize; if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { cbe_lun->ublockexp = fls(uss) - 1; cbe_lun->ublockoff = (uss - uos) % uss; } /* * Sanity check. The media size has to be at least one * sector long. */ if (be_lun->size_bytes < cbe_lun->blocksize) { error = EINVAL; snprintf(req->error_str, sizeof(req->error_str), "file %s size %ju < block size %u", be_lun->dev_path, (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize); } cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize; return (error); } static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct ctl_lun_create_params *params; struct cdevsw *csw; struct cdev *dev; char *value; int error, atomic, maxio, ref, unmap, tmp; off_t ps, pss, po, pos, us, uss, uo, uos, otmp; params = &be_lun->params; be_lun->dev_type = CTL_BE_BLOCK_DEV; csw = devvn_refthread(be_lun->vn, &dev, &ref); if (csw == NULL) return (ENXIO); if (strcmp(csw->d_name, "zvol") == 0) { be_lun->dispatch = ctl_be_block_dispatch_zvol; be_lun->get_lba_status = ctl_be_block_gls_zvol; atomic = maxio = CTLBLK_MAX_IO_SIZE; } else { be_lun->dispatch = ctl_be_block_dispatch_dev; be_lun->get_lba_status = NULL; atomic = 0; maxio = dev->si_iosize_max; if (maxio <= 0) maxio = DFLTPHYS; if (maxio > CTLBLK_MAX_IO_SIZE) maxio = CTLBLK_MAX_IO_SIZE; } be_lun->lun_flush = ctl_be_block_flush_dev; be_lun->getattr = ctl_be_block_getattr_dev; be_lun->unmap = ctl_be_block_unmap_dev; if (!csw->d_ioctl) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "no d_ioctl for device %s!", be_lun->dev_path); return (ENODEV); } error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD, curthread); if (error) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGSECTORSIZE ioctl " "on %s!", error, be_lun->dev_path); return (error); } /* * If the user has asked for a blocksize that is greater than the * backing device's blocksize, we can do it only if the blocksize * the user is asking for is an even multiple of the underlying * device's blocksize. */ if ((params->blocksize_bytes != 0) && (params->blocksize_bytes >= tmp)) { if (params->blocksize_bytes % tmp == 0) { cbe_lun->blocksize = params->blocksize_bytes; } else { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "requested blocksize %u is not an even " "multiple of backing device blocksize %u", params->blocksize_bytes, tmp); return (EINVAL); } } else if (params->blocksize_bytes != 0) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "requested blocksize %u < backing device " "blocksize %u", params->blocksize_bytes, tmp); return (EINVAL); } else cbe_lun->blocksize = tmp; error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD, curthread); if (error) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "error %d returned for DIOCGMEDIASIZE " " ioctl on %s!", error, be_lun->dev_path); return (error); } if (params->lun_size_bytes != 0) { if (params->lun_size_bytes > otmp) { dev_relthread(dev, ref); snprintf(req->error_str, sizeof(req->error_str), "requested LUN size %ju > backing device " "size %ju", (uintmax_t)params->lun_size_bytes, (uintmax_t)otmp); return (EINVAL); } be_lun->size_bytes = params->lun_size_bytes; } else be_lun->size_bytes = otmp; be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 0 : (be_lun->size_blocks - 1); error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD, curthread); if (error) ps = po = 0; else { error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po, FREAD, curthread); if (error) po = 0; } us = ps; uo = po; value = ctl_get_opt(&cbe_lun->options, "pblocksize"); if (value != NULL) ctl_expand_number(value, &ps); value = ctl_get_opt(&cbe_lun->options, "pblockoffset"); if (value != NULL) ctl_expand_number(value, &po); pss = ps / cbe_lun->blocksize; pos = po / cbe_lun->blocksize; if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) && ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) { cbe_lun->pblockexp = fls(pss) - 1; cbe_lun->pblockoff = (pss - pos) % pss; } value = ctl_get_opt(&cbe_lun->options, "ublocksize"); if (value != NULL) ctl_expand_number(value, &us); value = ctl_get_opt(&cbe_lun->options, "ublockoffset"); if (value != NULL) ctl_expand_number(value, &uo); uss = us / cbe_lun->blocksize; uos = uo / cbe_lun->blocksize; if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) && ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) { cbe_lun->ublockexp = fls(uss) - 1; cbe_lun->ublockoff = (uss - uos) % uss; } cbe_lun->atomicblock = atomic / cbe_lun->blocksize; cbe_lun->opttxferlen = maxio / cbe_lun->blocksize; if (be_lun->dispatch == ctl_be_block_dispatch_zvol) { unmap = 1; } else { struct diocgattr_arg arg; strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); arg.len = sizeof(arg.value.i); error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD, curthread); unmap = (error == 0) ? arg.value.i : 0; } value = ctl_get_opt(&cbe_lun->options, "unmap"); if (value != NULL) unmap = (strcmp(value, "on") == 0); if (unmap) cbe_lun->flags |= CTL_LUN_FLAG_UNMAP; else cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP; dev_relthread(dev, ref); return (0); } static int ctl_be_block_close(struct ctl_be_block_lun *be_lun) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; int flags; if (be_lun->vn) { flags = FREAD; if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0) flags |= FWRITE; (void)vn_close(be_lun->vn, flags, NOCRED, curthread); be_lun->vn = NULL; switch (be_lun->dev_type) { case CTL_BE_BLOCK_DEV: break; case CTL_BE_BLOCK_FILE: if (be_lun->backend.file.cred != NULL) { crfree(be_lun->backend.file.cred); be_lun->backend.file.cred = NULL; } break; case CTL_BE_BLOCK_NONE: break; default: panic("Unexpected backend type."); break; } be_lun->dev_type = CTL_BE_BLOCK_NONE; } return (0); } static int ctl_be_block_open(struct ctl_be_block_softc *softc, struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; struct nameidata nd; char *value; int error, flags; error = 0; if (rootvnode == NULL) { snprintf(req->error_str, sizeof(req->error_str), "Root filesystem is not mounted"); return (1); } pwd_ensure_dirs(); value = ctl_get_opt(&cbe_lun->options, "file"); if (value == NULL) { snprintf(req->error_str, sizeof(req->error_str), "no file argument specified"); return (1); } free(be_lun->dev_path, M_CTLBLK); be_lun->dev_path = strdup(value, M_CTLBLK); flags = FREAD; value = ctl_get_opt(&cbe_lun->options, "readonly"); if (value == NULL || strcmp(value, "on") != 0) flags |= FWRITE; again: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread); error = vn_open(&nd, &flags, 0, NULL); if ((error == EROFS || error == EACCES) && (flags & FWRITE)) { flags &= ~FWRITE; goto again; } if (error) { /* * This is the only reasonable guess we can make as far as * path if the user doesn't give us a fully qualified path. * If they want to specify a file, they need to specify the * full path. */ if (be_lun->dev_path[0] != '/') { char *dev_name; asprintf(&dev_name, M_CTLBLK, "/dev/%s", be_lun->dev_path); free(be_lun->dev_path, M_CTLBLK); be_lun->dev_path = dev_name; goto again; } snprintf(req->error_str, sizeof(req->error_str), "error opening %s: %d", be_lun->dev_path, error); return (error); } if (flags & FWRITE) cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY; else cbe_lun->flags |= CTL_LUN_FLAG_READONLY; NDFREE(&nd, NDF_ONLY_PNBUF); be_lun->vn = nd.ni_vp; /* We only support disks and files. */ if (vn_isdisk(be_lun->vn, &error)) { error = ctl_be_block_open_dev(be_lun, req); } else if (be_lun->vn->v_type == VREG) { error = ctl_be_block_open_file(be_lun, req); } else { error = EINVAL; snprintf(req->error_str, sizeof(req->error_str), "%s is not a disk or plain file", be_lun->dev_path); } VOP_UNLOCK(be_lun->vn, 0); if (error != 0) ctl_be_block_close(be_lun); cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; if (be_lun->dispatch != ctl_be_block_dispatch_dev) cbe_lun->serseq = CTL_LUN_SERSEQ_READ; value = ctl_get_opt(&cbe_lun->options, "serseq"); if (value != NULL && strcmp(value, "on") == 0) cbe_lun->serseq = CTL_LUN_SERSEQ_ON; else if (value != NULL && strcmp(value, "read") == 0) cbe_lun->serseq = CTL_LUN_SERSEQ_READ; else if (value != NULL && strcmp(value, "off") == 0) cbe_lun->serseq = CTL_LUN_SERSEQ_OFF; return (0); } static int ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_be_lun *cbe_lun; struct ctl_be_block_lun *be_lun; struct ctl_lun_create_params *params; char num_thread_str[16]; char tmpstr[32]; char *value; int retval, num_threads; int tmp_num_threads; params = &req->reqdata.create; retval = 0; req->status = CTL_LUN_OK; be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK); cbe_lun = &be_lun->cbe_lun; cbe_lun->be_lun = be_lun; be_lun->params = req->reqdata.create; be_lun->softc = softc; STAILQ_INIT(&be_lun->input_queue); STAILQ_INIT(&be_lun->config_read_queue); STAILQ_INIT(&be_lun->config_write_queue); STAILQ_INIT(&be_lun->datamove_queue); sprintf(be_lun->lunname, "cblk%d", softc->num_luns); mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF); mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF); ctl_init_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG, NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); if (be_lun->lun_zone == NULL) { snprintf(req->error_str, sizeof(req->error_str), "error allocating UMA zone"); goto bailout_error; } if (params->flags & CTL_LUN_FLAG_DEV_TYPE) cbe_lun->lun_type = params->device_type; else cbe_lun->lun_type = T_DIRECT; be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED; cbe_lun->flags = 0; value = ctl_get_opt(&cbe_lun->options, "ha_role"); if (value != NULL) { if (strcmp(value, "primary") == 0) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; if (cbe_lun->lun_type == T_DIRECT) { be_lun->size_bytes = params->lun_size_bytes; if (params->blocksize_bytes != 0) cbe_lun->blocksize = params->blocksize_bytes; else cbe_lun->blocksize = 512; be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; cbe_lun->maxlba = (be_lun->size_blocks == 0) ? 0 : (be_lun->size_blocks - 1); if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { retval = ctl_be_block_open(softc, be_lun, req); if (retval != 0) { retval = 0; req->status = CTL_LUN_WARNING; } } num_threads = cbb_num_threads; } else { num_threads = 1; } /* * XXX This searching loop might be refactored to be combined with * the loop above, */ value = ctl_get_opt(&cbe_lun->options, "num_threads"); if (value != NULL) { tmp_num_threads = strtol(value, NULL, 0); /* * We don't let the user specify less than one * thread, but hope he's clueful enough not to * specify 1000 threads. */ if (tmp_num_threads < 1) { snprintf(req->error_str, sizeof(req->error_str), "invalid number of threads %s", num_thread_str); goto bailout_error; } num_threads = tmp_num_threads; } if (be_lun->vn == NULL) cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; /* Tell the user the blocksize we ended up using */ params->lun_size_bytes = be_lun->size_bytes; params->blocksize_bytes = cbe_lun->blocksize; if (params->flags & CTL_LUN_FLAG_ID_REQ) { cbe_lun->req_lun_id = params->req_lun_id; cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ; } else cbe_lun->req_lun_id = 0; cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown; cbe_lun->lun_config_status = ctl_be_block_lun_config_status; cbe_lun->be = &ctl_be_block_driver; if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d", softc->num_luns); strncpy((char *)cbe_lun->serial_num, tmpstr, MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr))); /* Tell the user what we used for a serial number */ strncpy((char *)params->serial_num, tmpstr, MIN(sizeof(params->serial_num), sizeof(tmpstr))); } else { strncpy((char *)cbe_lun->serial_num, params->serial_num, MIN(sizeof(cbe_lun->serial_num), sizeof(params->serial_num))); } if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) { snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns); strncpy((char *)cbe_lun->device_id, tmpstr, MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr))); /* Tell the user what we used for a device ID */ strncpy((char *)params->device_id, tmpstr, MIN(sizeof(params->device_id), sizeof(tmpstr))); } else { strncpy((char *)cbe_lun->device_id, params->device_id, MIN(sizeof(cbe_lun->device_id), sizeof(params->device_id))); } TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun); be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK, taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue); if (be_lun->io_taskqueue == NULL) { snprintf(req->error_str, sizeof(req->error_str), "unable to create taskqueue"); goto bailout_error; } /* * Note that we start the same number of threads by default for * both the file case and the block device case. For the file * case, we need multiple threads to allow concurrency, because the * vnode interface is designed to be a blocking interface. For the * block device case, ZFS zvols at least will block the caller's * context in many instances, and so we need multiple threads to * overcome that problem. Other block devices don't need as many * threads, but they shouldn't cause too many problems. * * If the user wants to just have a single thread for a block * device, he can specify that when the LUN is created, or change * the tunable/sysctl to alter the default number of threads. */ retval = taskqueue_start_threads(&be_lun->io_taskqueue, /*num threads*/num_threads, /*priority*/PWAIT, /*thread name*/ "%s taskq", be_lun->lunname); if (retval != 0) goto bailout_error; be_lun->num_threads = num_threads; mtx_lock(&softc->lock); softc->num_luns++; STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links); mtx_unlock(&softc->lock); retval = ctl_add_lun(&be_lun->cbe_lun); if (retval != 0) { mtx_lock(&softc->lock); STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); snprintf(req->error_str, sizeof(req->error_str), "ctl_add_lun() returned error %d, see dmesg for " "details", retval); retval = 0; goto bailout_error; } mtx_lock(&softc->lock); /* * Tell the config_status routine that we're waiting so it won't * clean up the LUN in the event of an error. */ be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) { snprintf(req->error_str, sizeof(req->error_str), "LUN configuration error, see dmesg for details"); STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); goto bailout_error; } else { params->req_lun_id = cbe_lun->lun_id; } mtx_unlock(&softc->lock); be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id, cbe_lun->blocksize, DEVSTAT_ALL_SUPPORTED, cbe_lun->lun_type | DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_OTHER); return (retval); bailout_error: req->status = CTL_LUN_ERROR; if (be_lun->io_taskqueue != NULL) taskqueue_free(be_lun->io_taskqueue); ctl_be_block_close(be_lun); if (be_lun->dev_path != NULL) free(be_lun->dev_path, M_CTLBLK); if (be_lun->lun_zone != NULL) uma_zdestroy(be_lun->lun_zone); ctl_free_opts(&cbe_lun->options); mtx_destroy(&be_lun->queue_lock); mtx_destroy(&be_lun->io_lock); free(be_lun, M_CTLBLK); return (retval); } static int ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_lun_rm_params *params; struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; int retval; params = &req->reqdata.rm; mtx_lock(&softc->lock); STAILQ_FOREACH(be_lun, &softc->lun_list, links) { if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "LUN %u is not managed by the block backend", params->lun_id); goto bailout_error; } cbe_lun = &be_lun->cbe_lun; retval = ctl_disable_lun(cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned from ctl_disable_lun() for " "LUN %d", retval, params->lun_id); goto bailout_error; } if (be_lun->vn != NULL) { cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; ctl_lun_offline(cbe_lun); taskqueue_drain_all(be_lun->io_taskqueue); ctl_be_block_close(be_lun); } retval = ctl_invalidate_lun(cbe_lun); if (retval != 0) { snprintf(req->error_str, sizeof(req->error_str), "error %d returned from ctl_invalidate_lun() for " "LUN %d", retval, params->lun_id); goto bailout_error; } mtx_lock(&softc->lock); be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING; while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0); if (retval == EINTR) break; } be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING; if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) { snprintf(req->error_str, sizeof(req->error_str), "interrupted waiting for LUN to be freed"); mtx_unlock(&softc->lock); goto bailout_error; } STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links); softc->num_luns--; mtx_unlock(&softc->lock); taskqueue_drain_all(be_lun->io_taskqueue); taskqueue_free(be_lun->io_taskqueue); if (be_lun->disk_stats != NULL) devstat_remove_entry(be_lun->disk_stats); uma_zdestroy(be_lun->lun_zone); ctl_free_opts(&cbe_lun->options); free(be_lun->dev_path, M_CTLBLK); mtx_destroy(&be_lun->queue_lock); mtx_destroy(&be_lun->io_lock); free(be_lun, M_CTLBLK); req->status = CTL_LUN_OK; return (0); bailout_error: req->status = CTL_LUN_ERROR; return (0); } static int -ctl_be_block_modify_file(struct ctl_be_block_lun *be_lun, - struct ctl_lun_req *req) -{ - struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; - struct vattr vattr; - int error; - struct ctl_lun_create_params *params = &be_lun->params; - - if (params->lun_size_bytes != 0) { - be_lun->size_bytes = params->lun_size_bytes; - } else { - vn_lock(be_lun->vn, LK_SHARED | LK_RETRY); - error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred); - VOP_UNLOCK(be_lun->vn, 0); - if (error != 0) { - snprintf(req->error_str, sizeof(req->error_str), - "error calling VOP_GETATTR() for file %s", - be_lun->dev_path); - return (error); - } - be_lun->size_bytes = vattr.va_size; - } - be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; - cbe_lun->maxlba = (be_lun->size_blocks == 0) ? - 0 : (be_lun->size_blocks - 1); - return (0); -} - -static int -ctl_be_block_modify_dev(struct ctl_be_block_lun *be_lun, - struct ctl_lun_req *req) -{ - struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun; - struct ctl_lun_create_params *params = &be_lun->params; - struct cdevsw *csw; - struct cdev *dev; - uint64_t size_bytes; - int error, ref; - - csw = devvn_refthread(be_lun->vn, &dev, &ref); - if (csw == NULL) - return (ENXIO); - if (csw->d_ioctl == NULL) { - dev_relthread(dev, ref); - snprintf(req->error_str, sizeof(req->error_str), - "no d_ioctl for device %s!", be_lun->dev_path); - return (ENODEV); - } - - error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&size_bytes, FREAD, - curthread); - dev_relthread(dev, ref); - if (error) { - snprintf(req->error_str, sizeof(req->error_str), - "error %d returned for DIOCGMEDIASIZE ioctl " - "on %s!", error, be_lun->dev_path); - return (error); - } - - if (params->lun_size_bytes != 0) { - if (params->lun_size_bytes > size_bytes) { - snprintf(req->error_str, sizeof(req->error_str), - "requested LUN size %ju > backing device " - "size %ju", - (uintmax_t)params->lun_size_bytes, - (uintmax_t)size_bytes); - return (EINVAL); - } - be_lun->size_bytes = params->lun_size_bytes; - } else { - be_lun->size_bytes = size_bytes; - } - be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize; - cbe_lun->maxlba = (be_lun->size_blocks == 0) ? - 0 : (be_lun->size_blocks - 1); - return (0); -} - -static int ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req) { struct ctl_lun_modify_params *params; struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; char *value; uint64_t oldsize; int error, wasprim; params = &req->reqdata.modify; mtx_lock(&softc->lock); STAILQ_FOREACH(be_lun, &softc->lun_list, links) { if (be_lun->cbe_lun.lun_id == params->lun_id) break; } mtx_unlock(&softc->lock); if (be_lun == NULL) { snprintf(req->error_str, sizeof(req->error_str), "LUN %u is not managed by the block backend", params->lun_id); goto bailout_error; } cbe_lun = &be_lun->cbe_lun; if (params->lun_size_bytes != 0) be_lun->params.lun_size_bytes = params->lun_size_bytes; ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args); wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY); value = ctl_get_opt(&cbe_lun->options, "ha_role"); if (value != NULL) { if (strcmp(value, "primary") == 0) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; else cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; } else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF) cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY; else cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY; if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) { if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ctl_lun_primary(cbe_lun); else ctl_lun_secondary(cbe_lun); } oldsize = be_lun->size_blocks; if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) || control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) { if (be_lun->vn == NULL) error = ctl_be_block_open(softc, be_lun, req); else if (vn_isdisk(be_lun->vn, &error)) - error = ctl_be_block_modify_dev(be_lun, req); + error = ctl_be_block_open_dev(be_lun, req); else if (be_lun->vn->v_type == VREG) - error = ctl_be_block_modify_file(be_lun, req); + error = ctl_be_block_open_file(be_lun, req); else error = EINVAL; if ((cbe_lun->flags & CTL_LUN_FLAG_OFFLINE) && be_lun->vn != NULL) { cbe_lun->flags &= ~CTL_LUN_FLAG_OFFLINE; ctl_lun_online(cbe_lun); } } else { if (be_lun->vn != NULL) { cbe_lun->flags |= CTL_LUN_FLAG_OFFLINE; ctl_lun_offline(cbe_lun); taskqueue_drain_all(be_lun->io_taskqueue); error = ctl_be_block_close(be_lun); } else error = 0; } if (be_lun->size_blocks != oldsize) ctl_lun_capacity_changed(cbe_lun); /* Tell the user the exact size we ended up using */ params->lun_size_bytes = be_lun->size_bytes; req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK; return (0); bailout_error: req->status = CTL_LUN_ERROR; return (0); } static void ctl_be_block_lun_shutdown(void *be_lun) { struct ctl_be_block_lun *lun; struct ctl_be_block_softc *softc; lun = (struct ctl_be_block_lun *)be_lun; softc = lun->softc; mtx_lock(&softc->lock); lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED; if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) wakeup(lun); mtx_unlock(&softc->lock); } static void ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status) { struct ctl_be_block_lun *lun; struct ctl_be_block_softc *softc; lun = (struct ctl_be_block_lun *)be_lun; softc = lun->softc; if (status == CTL_LUN_CONFIG_OK) { mtx_lock(&softc->lock); lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; if (lun->flags & CTL_BE_BLOCK_LUN_WAITING) wakeup(lun); mtx_unlock(&softc->lock); /* * We successfully added the LUN, attempt to enable it. */ if (ctl_enable_lun(&lun->cbe_lun) != 0) { printf("%s: ctl_enable_lun() failed!\n", __func__); if (ctl_invalidate_lun(&lun->cbe_lun) != 0) { printf("%s: ctl_invalidate_lun() failed!\n", __func__); } } return; } mtx_lock(&softc->lock); lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED; lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR; wakeup(lun); mtx_unlock(&softc->lock); } static int ctl_be_block_config_write(union ctl_io *io) { struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; int retval; retval = 0; DPRINTF("entered\n"); cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; switch (io->scsiio.cdb[0]) { case SYNCHRONIZE_CACHE: case SYNCHRONIZE_CACHE_16: case WRITE_SAME_10: case WRITE_SAME_16: case UNMAP: /* * The upper level CTL code will filter out any CDBs with * the immediate bit set and return the proper error. * * We don't really need to worry about what LBA range the * user asked to be synced out. When they issue a sync * cache command, we'll sync out the whole thing. */ mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); break; case START_STOP_UNIT: { struct scsi_start_stop_unit *cdb; cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb; if (cdb->how & SSS_START) retval = ctl_start_lun(cbe_lun); else { retval = ctl_stop_lun(cbe_lun); /* * XXX KDM Copan-specific offline behavior. * Figure out a reasonable way to port this? */ #ifdef NEEDTOPORT if ((retval == 0) && (cdb->byte2 & SSS_ONOFFLINE)) retval = ctl_lun_offline(cbe_lun); #endif } /* * In general, the above routines should not fail. They * just set state for the LUN. So we've got something * pretty wrong here if we can't start or stop the LUN. */ if (retval != 0) { ctl_set_internal_failure(&io->scsiio, /*sks_valid*/ 1, /*retry_count*/ 0xf051); retval = CTL_RETVAL_COMPLETE; } else { ctl_set_success(&io->scsiio); } ctl_config_write_done(io); break; } default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_write_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_be_block_config_read(union ctl_io *io) { struct ctl_be_block_lun *be_lun; struct ctl_be_lun *cbe_lun; int retval = 0; DPRINTF("entered\n"); cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[ CTL_PRIV_BACKEND_LUN].ptr; be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun; switch (io->scsiio.cdb[0]) { case SERVICE_ACTION_IN: if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) { mtx_lock(&be_lun->queue_lock); STAILQ_INSERT_TAIL(&be_lun->config_read_queue, &io->io_hdr, links); mtx_unlock(&be_lun->queue_lock); taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task); retval = CTL_RETVAL_QUEUED; break; } ctl_set_invalid_field(&io->scsiio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 4); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; default: ctl_set_invalid_opcode(&io->scsiio); ctl_config_read_done(io); retval = CTL_RETVAL_COMPLETE; break; } return (retval); } static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb) { struct ctl_be_block_lun *lun; int retval; lun = (struct ctl_be_block_lun *)be_lun; retval = 0; retval = sbuf_printf(sb, "\t"); if (retval != 0) goto bailout; retval = sbuf_printf(sb, "%d", lun->num_threads); if (retval != 0) goto bailout; retval = sbuf_printf(sb, "\n"); bailout: return (retval); } static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname) { struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun; if (lun->getattr == NULL) return (UINT64_MAX); return (lun->getattr(lun, attrname)); } int ctl_be_block_init(void) { struct ctl_be_block_softc *softc; int retval; softc = &backend_block_softc; retval = 0; mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); STAILQ_INIT(&softc->lun_list); return (retval); } Index: projects/clang370-import/sys/cam/ctl/ctl_private.h =================================================================== --- projects/clang370-import/sys/cam/ctl/ctl_private.h (revision 288125) +++ projects/clang370-import/sys/cam/ctl/ctl_private.h (revision 288126) @@ -1,539 +1,543 @@ /*- * Copyright (c) 2003, 2004, 2005, 2008 Silicon Graphics International Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_private.h#7 $ * $FreeBSD$ */ /* * CAM Target Layer driver private data structures/definitions. * * Author: Ken Merry */ #ifndef _CTL_PRIVATE_H_ #define _CTL_PRIVATE_H_ /* * SCSI vendor and product names. */ #define CTL_VENDOR "FREEBSD " #define CTL_DIRECT_PRODUCT "CTLDISK " #define CTL_PROCESSOR_PRODUCT "CTLPROCESSOR " #define CTL_UNKNOWN_PRODUCT "CTLDEVICE " typedef enum { CTL_IOCTL_INPROG, CTL_IOCTL_DATAMOVE, CTL_IOCTL_DONE } ctl_fe_ioctl_state; struct ctl_fe_ioctl_params { struct cv sem; struct mtx ioctl_mtx; ctl_fe_ioctl_state state; }; #define CTL_POOL_ENTRIES_OTHER_SC 200 struct ctl_io_pool { char name[64]; uint32_t id; struct ctl_softc *ctl_softc; struct uma_zone *zone; }; typedef enum { CTL_SER_BLOCK, CTL_SER_BLOCKOPT, CTL_SER_EXTENT, CTL_SER_EXTENTOPT, CTL_SER_EXTENTSEQ, CTL_SER_PASS, CTL_SER_SKIP } ctl_serialize_action; typedef enum { CTL_ACTION_BLOCK, CTL_ACTION_OVERLAP, CTL_ACTION_OVERLAP_TAG, CTL_ACTION_PASS, CTL_ACTION_SKIP, CTL_ACTION_ERROR } ctl_action; /* * WARNING: Keep the bottom nibble here free, we OR in the data direction * flags for each command. * * Note: "OK_ON_NO_LUN" == we don't have to have a lun configured * "OK_ON_BOTH" == we have to have a lun configured * "SA5" == command has 5-bit service action at byte 1 */ typedef enum { CTL_CMD_FLAG_NONE = 0x0000, CTL_CMD_FLAG_NO_SENSE = 0x0010, CTL_CMD_FLAG_OK_ON_NO_LUN = 0x0020, CTL_CMD_FLAG_ALLOW_ON_RESV = 0x0040, CTL_CMD_FLAG_ALLOW_ON_PR_WRESV = 0x0080, CTL_CMD_FLAG_OK_ON_PROC = 0x0100, CTL_CMD_FLAG_OK_ON_SLUN = 0x0200, CTL_CMD_FLAG_OK_ON_BOTH = 0x0300, CTL_CMD_FLAG_OK_ON_STOPPED = 0x0400, CTL_CMD_FLAG_OK_ON_INOPERABLE = 0x0800, CTL_CMD_FLAG_OK_ON_STANDBY = 0x1000, CTL_CMD_FLAG_OK_ON_UNAVAIL = 0x2000, CTL_CMD_FLAG_ALLOW_ON_PR_RESV = 0x4000, CTL_CMD_FLAG_SA5 = 0x8000, CTL_CMD_FLAG_RUN_HERE = 0x10000 } ctl_cmd_flags; typedef enum { CTL_SERIDX_TUR = 0, CTL_SERIDX_READ, CTL_SERIDX_WRITE, CTL_SERIDX_UNMAP, CTL_SERIDX_SYNC, CTL_SERIDX_MD_SNS, CTL_SERIDX_MD_SEL, CTL_SERIDX_RQ_SNS, CTL_SERIDX_INQ, CTL_SERIDX_RD_CAP, CTL_SERIDX_RES, CTL_SERIDX_LOG_SNS, CTL_SERIDX_FORMAT, CTL_SERIDX_START, /* TBD: others to be filled in as needed */ CTL_SERIDX_COUNT, /* LAST, not a normal code, provides # codes */ CTL_SERIDX_INVLD = CTL_SERIDX_COUNT } ctl_seridx; typedef int ctl_opfunc(struct ctl_scsiio *ctsio); struct ctl_cmd_entry { ctl_opfunc *execute; ctl_seridx seridx; ctl_cmd_flags flags; ctl_lun_error_pattern pattern; uint8_t length; /* CDB length */ uint8_t usage[15]; /* Mask of allowed CDB bits * after the opcode byte. */ }; typedef enum { CTL_LUN_NONE = 0x000, CTL_LUN_CONTROL = 0x001, CTL_LUN_RESERVED = 0x002, CTL_LUN_INVALID = 0x004, CTL_LUN_DISABLED = 0x008, CTL_LUN_MALLOCED = 0x010, CTL_LUN_STOPPED = 0x020, CTL_LUN_INOPERABLE = 0x040, CTL_LUN_OFFLINE = 0x080, CTL_LUN_PR_RESERVED = 0x100, CTL_LUN_PRIMARY_SC = 0x200, CTL_LUN_SENSE_DESC = 0x400, CTL_LUN_READONLY = 0x800, CTL_LUN_PEER_SC_PRIMARY = 0x1000 } ctl_lun_flags; typedef enum { CTLBLOCK_FLAG_NONE = 0x00, CTLBLOCK_FLAG_INVALID = 0x01 } ctlblock_flags; union ctl_softcs { struct ctl_softc *ctl_softc; struct ctlblock_softc *ctlblock_softc; }; /* * Mode page defaults. */ #if 0 /* * These values make Solaris trim off some of the capacity. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 63 #define CTL_DEFAULT_HEADS 255 /* * These values seem to work okay. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 63 #define CTL_DEFAULT_HEADS 16 /* * These values work reasonably well. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 512 #define CTL_DEFAULT_HEADS 64 #endif /* * Solaris is somewhat picky about how many heads and sectors per track you * have defined in mode pages 3 and 4. These values seem to cause Solaris * to get the capacity more or less right when you run the format tool. * They still have problems when dealing with devices larger than 1TB, * but there isn't anything we can do about that. * * For smaller LUN sizes, this ends up causing the number of cylinders to * work out to 0. Solaris actually recognizes that and comes up with its * own bogus geometry to fit the actual capacity of the drive. They really * should just give up on geometry and stick to the read capacity * information alone for modern disk drives. * * One thing worth mentioning about Solaris' mkfs command is that it * doesn't like sectors per track values larger than 256. 512 seems to * work okay for format, but causes problems when you try to make a * filesystem. * * Another caveat about these values: the product of these two values * really should be a power of 2. This is because of the simplistic * shift-based calculation that we have to use on the i386 platform to * calculate the number of cylinders here. (If you use a divide, you end * up calling __udivdi3(), which is a hardware FP call on the PC. On the * XScale, it is done in software, so you can do that from inside the * kernel.) * * So for the current values (256 S/T, 128 H), we get 32768, which works * very nicely for calculating cylinders. * * If you want to change these values so that their product is no longer a * power of 2, re-visit the calculation in ctl_init_page_index(). You may * need to make it a bit more complicated to get the number of cylinders * right. */ #define CTL_DEFAULT_SECTORS_PER_TRACK 256 #define CTL_DEFAULT_HEADS 128 #define CTL_DEFAULT_ROTATION_RATE SVPD_NON_ROTATING struct ctl_page_index; typedef int ctl_modesen_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, int pc); typedef int ctl_modesel_handler(struct ctl_scsiio *ctsio, struct ctl_page_index *page_index, uint8_t *page_ptr); typedef enum { CTL_PAGE_FLAG_NONE = 0x00, CTL_PAGE_FLAG_DISK_ONLY = 0x01 } ctl_page_flags; struct ctl_page_index { uint8_t page_code; uint8_t subpage; uint16_t page_len; uint8_t *page_data; ctl_page_flags page_flags; ctl_modesen_handler *sense_handler; ctl_modesel_handler *select_handler; }; #define CTL_PAGE_CURRENT 0x00 #define CTL_PAGE_CHANGEABLE 0x01 #define CTL_PAGE_DEFAULT 0x02 #define CTL_PAGE_SAVED 0x03 #define CTL_NUM_LBP_PARAMS 4 #define CTL_NUM_LBP_THRESH 4 #define CTL_LBP_EXPONENT 11 /* 2048 sectors */ #define CTL_LBP_PERIOD 10 /* 10 seconds */ #define CTL_LBP_UA_PERIOD 300 /* 5 minutes */ struct ctl_logical_block_provisioning_page { struct scsi_logical_block_provisioning_page main; struct scsi_logical_block_provisioning_page_descr descr[CTL_NUM_LBP_THRESH]; }; static const struct ctl_page_index page_index_template[] = { {SMS_RW_ERROR_RECOVERY_PAGE, 0, sizeof(struct scsi_da_rw_recovery_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_FORMAT_DEVICE_PAGE, 0, sizeof(struct scsi_format_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_RIGID_DISK_PAGE, 0, sizeof(struct scsi_rigid_disk_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_CACHING_PAGE, 0, sizeof(struct scsi_caching_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, ctl_caching_sp_handler}, {SMS_CONTROL_MODE_PAGE, 0, sizeof(struct scsi_control_page), NULL, CTL_PAGE_FLAG_NONE, NULL, ctl_control_page_handler}, + {SMS_CONTROL_MODE_PAGE | SMPH_SPF, 0x01, + sizeof(struct scsi_control_ext_page), NULL, + CTL_PAGE_FLAG_NONE, NULL, NULL}, {SMS_INFO_EXCEPTIONS_PAGE, 0, sizeof(struct scsi_info_exceptions_page), NULL, CTL_PAGE_FLAG_NONE, NULL, NULL}, {SMS_INFO_EXCEPTIONS_PAGE | SMPH_SPF, 0x02, sizeof(struct ctl_logical_block_provisioning_page), NULL, CTL_PAGE_FLAG_DISK_ONLY, NULL, NULL}, {SMS_VENDOR_SPECIFIC_PAGE | SMPH_SPF, DBGCNF_SUBPAGE_CODE, sizeof(struct copan_debugconf_subpage), NULL, CTL_PAGE_FLAG_NONE, ctl_debugconf_sp_sense_handler, ctl_debugconf_sp_select_handler}, }; #define CTL_NUM_MODE_PAGES sizeof(page_index_template)/ \ sizeof(page_index_template[0]) struct ctl_mode_pages { struct scsi_da_rw_recovery_page rw_er_page[4]; struct scsi_format_page format_page[4]; struct scsi_rigid_disk_page rigid_disk_page[4]; struct scsi_caching_page caching_page[4]; struct scsi_control_page control_page[4]; + struct scsi_control_ext_page control_ext_page[4]; struct scsi_info_exceptions_page ie_page[4]; struct ctl_logical_block_provisioning_page lbp_page[4]; struct copan_debugconf_subpage debugconf_subpage[4]; struct ctl_page_index index[CTL_NUM_MODE_PAGES]; }; static const struct ctl_page_index log_page_index_template[] = { {SLS_SUPPORTED_PAGES_PAGE, 0, 0, NULL, CTL_PAGE_FLAG_NONE, NULL, NULL}, {SLS_SUPPORTED_PAGES_PAGE, SLS_SUPPORTED_SUBPAGES_SUBPAGE, 0, NULL, CTL_PAGE_FLAG_NONE, NULL, NULL}, {SLS_LOGICAL_BLOCK_PROVISIONING, 0, 0, NULL, CTL_PAGE_FLAG_NONE, ctl_lbp_log_sense_handler, NULL}, {SLS_STAT_AND_PERF, 0, 0, NULL, CTL_PAGE_FLAG_NONE, ctl_sap_log_sense_handler, NULL}, }; #define CTL_NUM_LOG_PAGES sizeof(log_page_index_template)/ \ sizeof(log_page_index_template[0]) struct ctl_log_pages { uint8_t pages_page[CTL_NUM_LOG_PAGES]; uint8_t subpages_page[CTL_NUM_LOG_PAGES * 2]; uint8_t lbp_page[12*CTL_NUM_LBP_PARAMS]; struct stat_page { struct scsi_log_stat_and_perf sap; struct scsi_log_idle_time it; struct scsi_log_time_interval ti; } stat_page; struct ctl_page_index index[CTL_NUM_LOG_PAGES]; }; struct ctl_lun_delay_info { ctl_delay_type datamove_type; uint32_t datamove_delay; ctl_delay_type done_type; uint32_t done_delay; }; typedef enum { CTL_ERR_INJ_NONE = 0x00, CTL_ERR_INJ_ABORTED = 0x01 } ctl_err_inject_flags; typedef enum { CTL_PR_FLAG_NONE = 0x00, CTL_PR_FLAG_REGISTERED = 0x01, CTL_PR_FLAG_ACTIVE_RES = 0x02 } ctl_per_res_flags; #define CTL_PR_ALL_REGISTRANTS 0xFFFFFFFF #define CTL_PR_NO_RESERVATION 0xFFFFFFF0 struct ctl_devid { int len; uint8_t data[]; }; /* * For report target port groups. */ #define NUM_TARGET_PORT_GROUPS 2 #define CTL_WRITE_BUFFER_SIZE 262144 struct tpc_list; struct ctl_lun { struct mtx lun_lock; uint64_t lun; ctl_lun_flags flags; STAILQ_HEAD(,ctl_error_desc) error_list; uint64_t error_serial; struct ctl_softc *ctl_softc; struct ctl_be_lun *be_lun; struct ctl_backend_driver *backend; int io_count; struct ctl_lun_delay_info delay_info; int sync_interval; int sync_count; #ifdef CTL_TIME_IO sbintime_t idle_time; sbintime_t last_busy; #endif TAILQ_HEAD(ctl_ooaq, ctl_io_hdr) ooa_queue; TAILQ_HEAD(ctl_blockq,ctl_io_hdr) blocked_queue; STAILQ_ENTRY(ctl_lun) links; STAILQ_ENTRY(ctl_lun) run_links; #ifdef CTL_WITH_CA uint32_t have_ca[CTL_MAX_INITIATORS >> 5]; struct scsi_sense_data pending_sense[CTL_MAX_INITIATORS]; #endif ctl_ua_type *pending_ua[CTL_MAX_PORTS]; uint8_t ua_tpt_info[8]; time_t lasttpt; struct ctl_mode_pages mode_pages; struct ctl_log_pages log_pages; struct ctl_lun_io_stats stats; uint32_t res_idx; unsigned int PRGeneration; uint64_t *pr_keys[CTL_MAX_PORTS]; int pr_key_count; uint32_t pr_res_idx; uint8_t res_type; uint8_t *write_buffer; struct ctl_devid *lun_devid; TAILQ_HEAD(tpc_lists, tpc_list) tpc_lists; }; typedef enum { CTL_FLAG_REAL_SYNC = 0x02, CTL_FLAG_ACTIVE_SHELF = 0x04 } ctl_gen_flags; #define CTL_MAX_THREADS 16 struct ctl_thread { struct mtx_padalign queue_lock; struct ctl_softc *ctl_softc; struct thread *thread; STAILQ_HEAD(, ctl_io_hdr) incoming_queue; STAILQ_HEAD(, ctl_io_hdr) rtr_queue; STAILQ_HEAD(, ctl_io_hdr) done_queue; STAILQ_HEAD(, ctl_io_hdr) isc_queue; }; struct tpc_token; struct ctl_softc { struct mtx ctl_lock; struct cdev *dev; int open_count; int num_disks; int num_luns; ctl_gen_flags flags; ctl_ha_mode ha_mode; int ha_id; int is_single; ctl_ha_link_state ha_link; int port_min; int port_max; int port_cnt; int init_min; int init_max; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; void *othersc_pool; struct proc *ctl_proc; int targ_online; uint32_t ctl_lun_mask[(CTL_MAX_LUNS + 31) / 32]; struct ctl_lun *ctl_luns[CTL_MAX_LUNS]; uint32_t ctl_port_mask[(CTL_MAX_PORTS + 31) / 32]; STAILQ_HEAD(, ctl_lun) lun_list; STAILQ_HEAD(, ctl_be_lun) pending_lun_queue; uint32_t num_frontends; STAILQ_HEAD(, ctl_frontend) fe_list; uint32_t num_ports; STAILQ_HEAD(, ctl_port) port_list; struct ctl_port *ctl_ports[CTL_MAX_PORTS]; uint32_t num_backends; STAILQ_HEAD(, ctl_backend_driver) be_list; struct uma_zone *io_zone; uint32_t cur_pool_id; struct ctl_thread threads[CTL_MAX_THREADS]; TAILQ_HEAD(tpc_tokens, tpc_token) tpc_tokens; struct callout tpc_timeout; struct mtx tpc_lock; }; #ifdef _KERNEL extern const struct ctl_cmd_entry ctl_cmd_table[256]; uint32_t ctl_get_initindex(struct ctl_nexus *nexus); int ctl_lun_map_init(struct ctl_port *port); int ctl_lun_map_deinit(struct ctl_port *port); int ctl_lun_map_set(struct ctl_port *port, uint32_t plun, uint32_t glun); int ctl_lun_map_unset(struct ctl_port *port, uint32_t plun); uint32_t ctl_lun_map_from_port(struct ctl_port *port, uint32_t plun); uint32_t ctl_lun_map_to_port(struct ctl_port *port, uint32_t glun); int ctl_pool_create(struct ctl_softc *ctl_softc, const char *pool_name, uint32_t total_ctl_io, void **npool); void ctl_pool_free(struct ctl_io_pool *pool); int ctl_scsi_release(struct ctl_scsiio *ctsio); int ctl_scsi_reserve(struct ctl_scsiio *ctsio); int ctl_start_stop(struct ctl_scsiio *ctsio); int ctl_sync_cache(struct ctl_scsiio *ctsio); int ctl_format(struct ctl_scsiio *ctsio); int ctl_read_buffer(struct ctl_scsiio *ctsio); int ctl_write_buffer(struct ctl_scsiio *ctsio); int ctl_write_same(struct ctl_scsiio *ctsio); int ctl_unmap(struct ctl_scsiio *ctsio); int ctl_mode_select(struct ctl_scsiio *ctsio); int ctl_mode_sense(struct ctl_scsiio *ctsio); int ctl_log_sense(struct ctl_scsiio *ctsio); int ctl_read_capacity(struct ctl_scsiio *ctsio); int ctl_read_capacity_16(struct ctl_scsiio *ctsio); int ctl_read_defect(struct ctl_scsiio *ctsio); int ctl_read_write(struct ctl_scsiio *ctsio); int ctl_cnw(struct ctl_scsiio *ctsio); int ctl_report_luns(struct ctl_scsiio *ctsio); int ctl_request_sense(struct ctl_scsiio *ctsio); int ctl_tur(struct ctl_scsiio *ctsio); int ctl_verify(struct ctl_scsiio *ctsio); int ctl_inquiry(struct ctl_scsiio *ctsio); int ctl_persistent_reserve_in(struct ctl_scsiio *ctsio); int ctl_persistent_reserve_out(struct ctl_scsiio *ctsio); int ctl_report_tagret_port_groups(struct ctl_scsiio *ctsio); int ctl_report_supported_opcodes(struct ctl_scsiio *ctsio); int ctl_report_supported_tmf(struct ctl_scsiio *ctsio); int ctl_report_timestamp(struct ctl_scsiio *ctsio); int ctl_get_lba_status(struct ctl_scsiio *ctsio); void ctl_tpc_init(struct ctl_softc *softc); void ctl_tpc_shutdown(struct ctl_softc *softc); void ctl_tpc_lun_init(struct ctl_lun *lun); void ctl_tpc_lun_shutdown(struct ctl_lun *lun); int ctl_inquiry_evpd_tpc(struct ctl_scsiio *ctsio, int alloc_len); int ctl_receive_copy_status_lid1(struct ctl_scsiio *ctsio); int ctl_receive_copy_failure_details(struct ctl_scsiio *ctsio); int ctl_receive_copy_status_lid4(struct ctl_scsiio *ctsio); int ctl_receive_copy_operating_parameters(struct ctl_scsiio *ctsio); int ctl_extended_copy_lid1(struct ctl_scsiio *ctsio); int ctl_extended_copy_lid4(struct ctl_scsiio *ctsio); int ctl_copy_operation_abort(struct ctl_scsiio *ctsio); int ctl_populate_token(struct ctl_scsiio *ctsio); int ctl_write_using_token(struct ctl_scsiio *ctsio); int ctl_receive_rod_token_information(struct ctl_scsiio *ctsio); int ctl_report_all_rod_tokens(struct ctl_scsiio *ctsio); #endif /* _KERNEL */ #endif /* _CTL_PRIVATE_H_ */ /* * vim: ts=8 */ Index: projects/clang370-import/sys/cam/scsi/scsi_all.h =================================================================== --- projects/clang370-import/sys/cam/scsi/scsi_all.h (revision 288125) +++ projects/clang370-import/sys/cam/scsi/scsi_all.h (revision 288126) @@ -1,4147 +1,4160 @@ /*- * Largely written by Julian Elischer (julian@tfs.com) * for TRW Financial Systems. * * TRW Financial Systems, in accordance with their agreement with Carnegie * Mellon University, makes this software available to CMU to distribute * or use in any manner that they see fit as long as this message is kept with * the software. For this reason TFS also grants any other persons or * organisations permission to use or modify this software. * * TFS supplies this software to be publicly redistributed * on the understanding that TFS is not responsible for the correct * functioning of this software in any circumstances. * * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992 * * $FreeBSD$ */ /* * SCSI general interface description */ #ifndef _SCSI_SCSI_ALL_H #define _SCSI_SCSI_ALL_H 1 #include #include #ifdef _KERNEL /* * This is the number of seconds we wait for devices to settle after a SCSI * bus reset. */ extern int scsi_delay; #endif /* _KERNEL */ /* * SCSI command format */ /* * Define dome bits that are in ALL (or a lot of) scsi commands */ #define SCSI_CTL_LINK 0x01 #define SCSI_CTL_FLAG 0x02 #define SCSI_CTL_VENDOR 0xC0 #define SCSI_CMD_LUN 0xA0 /* these two should not be needed */ #define SCSI_CMD_LUN_SHIFT 5 /* LUN in the cmd is no longer SCSI */ #define SCSI_MAX_CDBLEN 16 /* * 16 byte commands are in the * SCSI-3 spec */ #if defined(CAM_MAX_CDBLEN) && (CAM_MAX_CDBLEN < SCSI_MAX_CDBLEN) #error "CAM_MAX_CDBLEN cannot be less than SCSI_MAX_CDBLEN" #endif /* 6byte CDBs special case 0 length to be 256 */ #define SCSI_CDB6_LEN(len) ((len) == 0 ? 256 : len) /* * This type defines actions to be taken when a particular sense code is * received. Right now, these flags are only defined to take up 16 bits, * but can be expanded in the future if necessary. */ typedef enum { SS_NOP = 0x000000, /* Do nothing */ SS_RETRY = 0x010000, /* Retry the command */ SS_FAIL = 0x020000, /* Bail out */ SS_START = 0x030000, /* Send a Start Unit command to the device, * then retry the original command. */ SS_TUR = 0x040000, /* Send a Test Unit Ready command to the * device, then retry the original command. */ SS_MASK = 0xff0000 } scsi_sense_action; typedef enum { SSQ_NONE = 0x0000, SSQ_DECREMENT_COUNT = 0x0100, /* Decrement the retry count */ SSQ_MANY = 0x0200, /* send lots of recovery commands */ SSQ_RANGE = 0x0400, /* * This table entry represents the * end of a range of ASCQs that * have identical error actions * and text. */ SSQ_PRINT_SENSE = 0x0800, SSQ_UA = 0x1000, /* Broadcast UA. */ SSQ_RESCAN = 0x2000, /* Rescan target for LUNs. */ SSQ_LOST = 0x4000, /* Destroy the LUNs. */ SSQ_MASK = 0xff00 } scsi_sense_action_qualifier; /* Mask for error status values */ #define SS_ERRMASK 0xff /* The default, retyable, error action */ #define SS_RDEF SS_RETRY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE|EIO /* The retyable, error action, with table specified error code */ #define SS_RET SS_RETRY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE /* Wait for transient error status to change */ #define SS_WAIT SS_TUR|SSQ_MANY|SSQ_DECREMENT_COUNT|SSQ_PRINT_SENSE /* Fatal error action, with table specified error code */ #define SS_FATAL SS_FAIL|SSQ_PRINT_SENSE struct scsi_generic { u_int8_t opcode; u_int8_t bytes[11]; }; struct scsi_request_sense { u_int8_t opcode; u_int8_t byte2; #define SRS_DESC 0x01 u_int8_t unused[2]; u_int8_t length; u_int8_t control; }; struct scsi_test_unit_ready { u_int8_t opcode; u_int8_t byte2; u_int8_t unused[3]; u_int8_t control; }; struct scsi_receive_diag { uint8_t opcode; uint8_t byte2; #define SRD_PCV 0x01 uint8_t page_code; uint8_t length[2]; uint8_t control; }; struct scsi_send_diag { uint8_t opcode; uint8_t byte2; #define SSD_UNITOFFL 0x01 #define SSD_DEVOFFL 0x02 #define SSD_SELFTEST 0x04 #define SSD_PF 0x10 #define SSD_SELF_TEST_CODE_MASK 0xE0 #define SSD_SELF_TEST_CODE_SHIFT 5 #define SSD_SELF_TEST_CODE_NONE 0x00 #define SSD_SELF_TEST_CODE_BG_SHORT 0x01 #define SSD_SELF_TEST_CODE_BG_EXTENDED 0x02 #define SSD_SELF_TEST_CODE_BG_ABORT 0x04 #define SSD_SELF_TEST_CODE_FG_SHORT 0x05 #define SSD_SELF_TEST_CODE_FG_EXTENDED 0x06 uint8_t reserved; uint8_t length[2]; uint8_t control; }; struct scsi_sense { u_int8_t opcode; u_int8_t byte2; u_int8_t unused[2]; u_int8_t length; u_int8_t control; }; struct scsi_inquiry { u_int8_t opcode; u_int8_t byte2; #define SI_EVPD 0x01 #define SI_CMDDT 0x02 u_int8_t page_code; u_int8_t length[2]; u_int8_t control; }; struct scsi_mode_sense_6 { u_int8_t opcode; u_int8_t byte2; #define SMS_DBD 0x08 u_int8_t page; #define SMS_PAGE_CODE 0x3F #define SMS_VENDOR_SPECIFIC_PAGE 0x00 #define SMS_DISCONNECT_RECONNECT_PAGE 0x02 #define SMS_FORMAT_DEVICE_PAGE 0x03 #define SMS_GEOMETRY_PAGE 0x04 #define SMS_CACHE_PAGE 0x08 #define SMS_PERIPHERAL_DEVICE_PAGE 0x09 #define SMS_CONTROL_MODE_PAGE 0x0A #define SMS_PROTO_SPECIFIC_PAGE 0x19 #define SMS_INFO_EXCEPTIONS_PAGE 0x1C #define SMS_ALL_PAGES_PAGE 0x3F #define SMS_PAGE_CTRL_MASK 0xC0 #define SMS_PAGE_CTRL_CURRENT 0x00 #define SMS_PAGE_CTRL_CHANGEABLE 0x40 #define SMS_PAGE_CTRL_DEFAULT 0x80 #define SMS_PAGE_CTRL_SAVED 0xC0 u_int8_t subpage; #define SMS_SUBPAGE_PAGE_0 0x00 #define SMS_SUBPAGE_ALL 0xff u_int8_t length; u_int8_t control; }; struct scsi_mode_sense_10 { u_int8_t opcode; u_int8_t byte2; /* same bits as small version */ #define SMS10_LLBAA 0x10 u_int8_t page; /* same bits as small version */ u_int8_t subpage; u_int8_t unused[3]; u_int8_t length[2]; u_int8_t control; }; struct scsi_mode_select_6 { u_int8_t opcode; u_int8_t byte2; #define SMS_SP 0x01 #define SMS_PF 0x10 u_int8_t unused[2]; u_int8_t length; u_int8_t control; }; struct scsi_mode_select_10 { u_int8_t opcode; u_int8_t byte2; /* same bits as small version */ u_int8_t unused[5]; u_int8_t length[2]; u_int8_t control; }; /* * When sending a mode select to a tape drive, the medium type must be 0. */ struct scsi_mode_hdr_6 { u_int8_t datalen; u_int8_t medium_type; u_int8_t dev_specific; u_int8_t block_descr_len; }; struct scsi_mode_hdr_10 { u_int8_t datalen[2]; u_int8_t medium_type; u_int8_t dev_specific; u_int8_t reserved[2]; u_int8_t block_descr_len[2]; }; struct scsi_mode_block_descr { u_int8_t density_code; u_int8_t num_blocks[3]; u_int8_t reserved; u_int8_t block_len[3]; }; struct scsi_per_res_in { u_int8_t opcode; u_int8_t action; #define SPRI_RK 0x00 #define SPRI_RR 0x01 #define SPRI_RC 0x02 #define SPRI_RS 0x03 u_int8_t reserved[5]; u_int8_t length[2]; #define SPRI_MAX_LEN 0xffff u_int8_t control; }; struct scsi_per_res_in_header { u_int8_t generation[4]; u_int8_t length[4]; }; struct scsi_per_res_key { u_int8_t key[8]; }; struct scsi_per_res_in_keys { struct scsi_per_res_in_header header; struct scsi_per_res_key keys[0]; }; struct scsi_per_res_cap { uint8_t length[2]; uint8_t flags1; #define SPRI_RLR_C 0x80 #define SPRI_CRH 0x10 #define SPRI_SIP_C 0x08 #define SPRI_ATP_C 0x04 #define SPRI_PTPL_C 0x01 uint8_t flags2; #define SPRI_TMV 0x80 #define SPRI_ALLOW_CMD_MASK 0x70 #define SPRI_ALLOW_CMD_SHIFT 4 #define SPRI_ALLOW_NA 0x00 #define SPRI_ALLOW_1 0x10 #define SPRI_ALLOW_2 0x20 #define SPRI_ALLOW_3 0x30 #define SPRI_ALLOW_4 0x40 #define SPRI_ALLOW_5 0x50 #define SPRI_PTPL_A 0x01 uint8_t type_mask[2]; #define SPRI_TM_WR_EX_AR 0x8000 #define SPRI_TM_EX_AC_RO 0x4000 #define SPRI_TM_WR_EX_RO 0x2000 #define SPRI_TM_EX_AC 0x0800 #define SPRI_TM_WR_EX 0x0200 #define SPRI_TM_EX_AC_AR 0x0001 uint8_t reserved[2]; }; struct scsi_per_res_in_rsrv_data { uint8_t reservation[8]; uint8_t scope_addr[4]; uint8_t reserved; uint8_t scopetype; #define SPRT_WE 0x01 #define SPRT_EA 0x03 #define SPRT_WERO 0x05 #define SPRT_EARO 0x06 #define SPRT_WEAR 0x07 #define SPRT_EAAR 0x08 uint8_t extent_length[2]; }; struct scsi_per_res_in_rsrv { struct scsi_per_res_in_header header; struct scsi_per_res_in_rsrv_data data; }; struct scsi_per_res_in_full_desc { struct scsi_per_res_key res_key; uint8_t reserved1[4]; uint8_t flags; #define SPRI_FULL_ALL_TG_PT 0x02 #define SPRI_FULL_R_HOLDER 0x01 uint8_t scopetype; uint8_t reserved2[4]; uint8_t rel_trgt_port_id[2]; uint8_t additional_length[4]; uint8_t transport_id[]; }; struct scsi_per_res_in_full { struct scsi_per_res_in_header header; struct scsi_per_res_in_full_desc desc[]; }; struct scsi_per_res_out { u_int8_t opcode; u_int8_t action; #define SPRO_REGISTER 0x00 #define SPRO_RESERVE 0x01 #define SPRO_RELEASE 0x02 #define SPRO_CLEAR 0x03 #define SPRO_PREEMPT 0x04 #define SPRO_PRE_ABO 0x05 #define SPRO_REG_IGNO 0x06 #define SPRO_REG_MOVE 0x07 #define SPRO_REPL_LOST_RES 0x08 #define SPRO_ACTION_MASK 0x1f u_int8_t scope_type; #define SPR_SCOPE_MASK 0xf0 #define SPR_SCOPE_SHIFT 4 #define SPR_LU_SCOPE 0x00 #define SPR_EXTENT_SCOPE 0x10 #define SPR_ELEMENT_SCOPE 0x20 #define SPR_TYPE_MASK 0x0f #define SPR_TYPE_RD_SHARED 0x00 #define SPR_TYPE_WR_EX 0x01 #define SPR_TYPE_RD_EX 0x02 #define SPR_TYPE_EX_AC 0x03 #define SPR_TYPE_SHARED 0x04 #define SPR_TYPE_WR_EX_RO 0x05 #define SPR_TYPE_EX_AC_RO 0x06 #define SPR_TYPE_WR_EX_AR 0x07 #define SPR_TYPE_EX_AC_AR 0x08 u_int8_t reserved[2]; u_int8_t length[4]; u_int8_t control; }; struct scsi_per_res_out_parms { struct scsi_per_res_key res_key; u_int8_t serv_act_res_key[8]; u_int8_t scope_spec_address[4]; u_int8_t flags; #define SPR_SPEC_I_PT 0x08 #define SPR_ALL_TG_PT 0x04 #define SPR_APTPL 0x01 u_int8_t reserved1; u_int8_t extent_length[2]; u_int8_t transport_id_list[]; }; struct scsi_per_res_out_trans_ids { u_int8_t additional_length[4]; u_int8_t transport_ids[]; }; /* * Used with REGISTER AND MOVE serivce action of the PERSISTENT RESERVE OUT * command. */ struct scsi_per_res_reg_move { struct scsi_per_res_key res_key; u_int8_t serv_act_res_key[8]; u_int8_t reserved; u_int8_t flags; #define SPR_REG_MOVE_UNREG 0x02 #define SPR_REG_MOVE_APTPL 0x01 u_int8_t rel_trgt_port_id[2]; u_int8_t transport_id_length[4]; u_int8_t transport_id[]; }; struct scsi_transportid_header { uint8_t format_protocol; #define SCSI_TRN_FORMAT_MASK 0xc0 #define SCSI_TRN_FORMAT_SHIFT 6 #define SCSI_TRN_PROTO_MASK 0x0f }; struct scsi_transportid_fcp { uint8_t format_protocol; #define SCSI_TRN_FCP_FORMAT_DEFAULT 0x00 uint8_t reserved1[7]; uint8_t n_port_name[8]; uint8_t reserved2[8]; }; struct scsi_transportid_spi { uint8_t format_protocol; #define SCSI_TRN_SPI_FORMAT_DEFAULT 0x00 uint8_t reserved1; uint8_t scsi_addr[2]; uint8_t obsolete[2]; uint8_t rel_trgt_port_id[2]; uint8_t reserved2[16]; }; struct scsi_transportid_1394 { uint8_t format_protocol; #define SCSI_TRN_1394_FORMAT_DEFAULT 0x00 uint8_t reserved1[7]; uint8_t eui64[8]; uint8_t reserved2[8]; }; struct scsi_transportid_rdma { uint8_t format_protocol; #define SCSI_TRN_RDMA_FORMAT_DEFAULT 0x00 uint8_t reserved[7]; #define SCSI_TRN_RDMA_PORT_LEN 16 uint8_t initiator_port_id[SCSI_TRN_RDMA_PORT_LEN]; }; struct scsi_transportid_iscsi_device { uint8_t format_protocol; #define SCSI_TRN_ISCSI_FORMAT_DEVICE 0x00 uint8_t reserved; uint8_t additional_length[2]; uint8_t iscsi_name[]; }; struct scsi_transportid_iscsi_port { uint8_t format_protocol; #define SCSI_TRN_ISCSI_FORMAT_PORT 0x40 uint8_t reserved; uint8_t additional_length[2]; uint8_t iscsi_name[]; /* * Followed by a separator and iSCSI initiator session ID */ }; struct scsi_transportid_sas { uint8_t format_protocol; #define SCSI_TRN_SAS_FORMAT_DEFAULT 0x00 uint8_t reserved1[3]; uint8_t sas_address[8]; uint8_t reserved2[12]; }; struct scsi_sop_routing_id_norm { uint8_t bus; uint8_t devfunc; #define SCSI_TRN_SOP_BUS_MAX 0xff #define SCSI_TRN_SOP_DEV_MAX 0x1f #define SCSI_TRN_SOP_DEV_MASK 0xf8 #define SCSI_TRN_SOP_DEV_SHIFT 3 #define SCSI_TRN_SOP_FUNC_NORM_MASK 0x07 #define SCSI_TRN_SOP_FUNC_NORM_MAX 0x07 }; struct scsi_sop_routing_id_alt { uint8_t bus; uint8_t function; #define SCSI_TRN_SOP_FUNC_ALT_MAX 0xff }; struct scsi_transportid_sop { uint8_t format_protocol; #define SCSI_TRN_SOP_FORMAT_DEFAULT 0x00 uint8_t reserved1; uint8_t routing_id[2]; uint8_t reserved2[20]; }; struct scsi_log_sense { u_int8_t opcode; u_int8_t byte2; #define SLS_SP 0x01 #define SLS_PPC 0x02 u_int8_t page; #define SLS_PAGE_CODE 0x3F #define SLS_SUPPORTED_PAGES_PAGE 0x00 #define SLS_OVERRUN_PAGE 0x01 #define SLS_ERROR_WRITE_PAGE 0x02 #define SLS_ERROR_READ_PAGE 0x03 #define SLS_ERROR_READREVERSE_PAGE 0x04 #define SLS_ERROR_VERIFY_PAGE 0x05 #define SLS_ERROR_NONMEDIUM_PAGE 0x06 #define SLS_ERROR_LASTN_PAGE 0x07 #define SLS_LOGICAL_BLOCK_PROVISIONING 0x0c #define SLS_SELF_TEST_PAGE 0x10 #define SLS_STAT_AND_PERF 0x19 #define SLS_IE_PAGE 0x2f #define SLS_PAGE_CTRL_MASK 0xC0 #define SLS_PAGE_CTRL_THRESHOLD 0x00 #define SLS_PAGE_CTRL_CUMULATIVE 0x40 #define SLS_PAGE_CTRL_THRESH_DEFAULT 0x80 #define SLS_PAGE_CTRL_CUMUL_DEFAULT 0xC0 u_int8_t subpage; #define SLS_SUPPORTED_SUBPAGES_SUBPAGE 0xff u_int8_t reserved; u_int8_t paramptr[2]; u_int8_t length[2]; u_int8_t control; }; struct scsi_log_select { u_int8_t opcode; u_int8_t byte2; /* SLS_SP 0x01 */ #define SLS_PCR 0x02 u_int8_t page; /* SLS_PAGE_CTRL_MASK 0xC0 */ /* SLS_PAGE_CTRL_THRESHOLD 0x00 */ /* SLS_PAGE_CTRL_CUMULATIVE 0x40 */ /* SLS_PAGE_CTRL_THRESH_DEFAULT 0x80 */ /* SLS_PAGE_CTRL_CUMUL_DEFAULT 0xC0 */ u_int8_t reserved[4]; u_int8_t length[2]; u_int8_t control; }; struct scsi_log_header { u_int8_t page; #define SL_PAGE_CODE 0x3F #define SL_SPF 0x40 #define SL_DS 0x80 u_int8_t subpage; u_int8_t datalen[2]; }; struct scsi_log_param_header { u_int8_t param_code[2]; u_int8_t param_control; #define SLP_LP 0x01 #define SLP_LBIN 0x02 #define SLP_TMC_MASK 0x0C #define SLP_TMC_ALWAYS 0x00 #define SLP_TMC_EQUAL 0x04 #define SLP_TMC_NOTEQUAL 0x08 #define SLP_TMC_GREATER 0x0C #define SLP_ETC 0x10 #define SLP_TSD 0x20 #define SLP_DS 0x40 #define SLP_DU 0x80 u_int8_t param_len; }; struct scsi_log_stat_and_perf { struct scsi_log_param_header hdr; #define SLP_SAP 0x0001 uint8_t read_num[8]; uint8_t write_num[8]; uint8_t recvieved_lba[8]; uint8_t transmitted_lba[8]; uint8_t read_int[8]; uint8_t write_int[8]; uint8_t weighted_num[8]; uint8_t weighted_int[8]; }; struct scsi_log_idle_time { struct scsi_log_param_header hdr; #define SLP_IT 0x0002 uint8_t idle_int[8]; }; struct scsi_log_time_interval { struct scsi_log_param_header hdr; #define SLP_TI 0x0003 uint8_t exponent[4]; uint8_t integer[4]; }; struct scsi_log_fua_stat_and_perf { struct scsi_log_param_header hdr; #define SLP_FUA_SAP 0x0004 uint8_t fua_read_num[8]; uint8_t fua_write_num[8]; uint8_t fuanv_read_num[8]; uint8_t fuanv_write_num[8]; uint8_t fua_read_int[8]; uint8_t fua_write_int[8]; uint8_t fuanv_read_int[8]; uint8_t fuanv_write_int[8]; }; struct scsi_control_page { u_int8_t page_code; u_int8_t page_length; u_int8_t rlec; #define SCP_RLEC 0x01 /*Report Log Exception Cond*/ #define SCP_GLTSD 0x02 /*Global Logging target save disable */ #define SCP_DSENSE 0x04 /*Descriptor Sense */ #define SCP_DPICZ 0x08 /*Disable Prot. Info Check if Prot. Field is Zero */ #define SCP_TMF_ONLY 0x10 /*TM Functions Only*/ #define SCP_TST_MASK 0xE0 /*Task Set Type Mask*/ #define SCP_TST_ONE 0x00 /*One Task Set*/ #define SCP_TST_SEPARATE 0x20 /*Separate Task Sets*/ u_int8_t queue_flags; #define SCP_QUEUE_ALG_MASK 0xF0 #define SCP_QUEUE_ALG_RESTRICTED 0x00 #define SCP_QUEUE_ALG_UNRESTRICTED 0x10 #define SCP_NUAR 0x08 /*No UA on release*/ #define SCP_QUEUE_ERR 0x02 /*Queued I/O aborted for CACs*/ #define SCP_QUEUE_DQUE 0x01 /*Queued I/O disabled*/ u_int8_t eca_and_aen; #define SCP_EECA 0x80 /*Enable Extended CA*/ #define SCP_RAC 0x40 /*Report a check*/ #define SCP_SWP 0x08 /*Software Write Protect*/ #define SCP_RAENP 0x04 /*Ready AEN Permission*/ #define SCP_UAAENP 0x02 /*UA AEN Permission*/ #define SCP_EAENP 0x01 /*Error AEN Permission*/ u_int8_t flags4; #define SCP_ATO 0x80 /*Application tag owner*/ #define SCP_TAS 0x40 /*Task aborted status*/ #define SCP_ATMPE 0x20 /*Application tag mode page*/ #define SCP_RWWP 0x10 /*Reject write without prot*/ u_int8_t aen_holdoff_period[2]; u_int8_t busy_timeout_period[2]; u_int8_t extended_selftest_completion_time[2]; }; +struct scsi_control_ext_page { + uint8_t page_code; + uint8_t subpage_code; + uint8_t page_length[2]; + uint8_t flags; +#define SCEP_TCMOS 0x04 /* Timestamp Changeable by */ +#define SCEP_SCSIP 0x02 /* SCSI Precedence (clock) */ +#define SCEP_IALUAE 0x01 /* Implicit ALUA Enabled */ + uint8_t prio; + uint8_t max_sense; + uint8_t reserve[25]; +}; + struct scsi_cache_page { u_int8_t page_code; #define SCHP_PAGE_SAVABLE 0x80 /* Page is savable */ u_int8_t page_length; u_int8_t cache_flags; #define SCHP_FLAGS_WCE 0x04 /* Write Cache Enable */ #define SCHP_FLAGS_MF 0x02 /* Multiplication factor */ #define SCHP_FLAGS_RCD 0x01 /* Read Cache Disable */ u_int8_t rw_cache_policy; u_int8_t dis_prefetch[2]; u_int8_t min_prefetch[2]; u_int8_t max_prefetch[2]; u_int8_t max_prefetch_ceil[2]; }; /* * XXX KDM * Updated version of the cache page, as of SBC. Update this to SBC-3 and * rationalize the two. */ struct scsi_caching_page { uint8_t page_code; #define SMS_CACHING_PAGE 0x08 uint8_t page_length; uint8_t flags1; #define SCP_IC 0x80 #define SCP_ABPF 0x40 #define SCP_CAP 0x20 #define SCP_DISC 0x10 #define SCP_SIZE 0x08 #define SCP_WCE 0x04 #define SCP_MF 0x02 #define SCP_RCD 0x01 uint8_t ret_priority; uint8_t disable_pf_transfer_len[2]; uint8_t min_prefetch[2]; uint8_t max_prefetch[2]; uint8_t max_pf_ceiling[2]; uint8_t flags2; #define SCP_FSW 0x80 #define SCP_LBCSS 0x40 #define SCP_DRA 0x20 #define SCP_VS1 0x10 #define SCP_VS2 0x08 uint8_t cache_segments; uint8_t cache_seg_size[2]; uint8_t reserved; uint8_t non_cache_seg_size[3]; }; /* * XXX KDM move this off to a vendor shim. */ struct copan_debugconf_subpage { uint8_t page_code; #define DBGCNF_PAGE_CODE 0x00 uint8_t subpage; #define DBGCNF_SUBPAGE_CODE 0xF0 uint8_t page_length[2]; uint8_t page_version; #define DBGCNF_VERSION 0x00 uint8_t ctl_time_io_secs[2]; }; struct scsi_info_exceptions_page { u_int8_t page_code; #define SIEP_PAGE_SAVABLE 0x80 /* Page is savable */ u_int8_t page_length; u_int8_t info_flags; #define SIEP_FLAGS_PERF 0x80 #define SIEP_FLAGS_EBF 0x20 #define SIEP_FLAGS_EWASC 0x10 #define SIEP_FLAGS_DEXCPT 0x08 #define SIEP_FLAGS_TEST 0x04 #define SIEP_FLAGS_EBACKERR 0x02 #define SIEP_FLAGS_LOGERR 0x01 u_int8_t mrie; u_int8_t interval_timer[4]; u_int8_t report_count[4]; }; struct scsi_logical_block_provisioning_page_descr { uint8_t flags; #define SLBPPD_ENABLED 0x80 #define SLBPPD_TYPE_MASK 0x38 #define SLBPPD_ARMING_MASK 0x07 #define SLBPPD_ARMING_DEC 0x02 #define SLBPPD_ARMING_INC 0x01 uint8_t resource; uint8_t reserved[2]; uint8_t count[4]; }; struct scsi_logical_block_provisioning_page { uint8_t page_code; uint8_t subpage_code; uint8_t page_length[2]; uint8_t flags; #define SLBPP_SITUA 0x01 uint8_t reserved[11]; struct scsi_logical_block_provisioning_page_descr descr[0]; }; /* * SCSI protocol identifier values, current as of SPC4r36l. */ #define SCSI_PROTO_FC 0x00 /* Fibre Channel */ #define SCSI_PROTO_SPI 0x01 /* Parallel SCSI */ #define SCSI_PROTO_SSA 0x02 /* Serial Storage Arch. */ #define SCSI_PROTO_1394 0x03 /* IEEE 1394 (Firewire) */ #define SCSI_PROTO_RDMA 0x04 /* SCSI RDMA Protocol */ #define SCSI_PROTO_ISCSI 0x05 /* Internet SCSI */ #define SCSI_PROTO_iSCSI 0x05 /* Internet SCSI */ #define SCSI_PROTO_SAS 0x06 /* SAS Serial SCSI Protocol */ #define SCSI_PROTO_ADT 0x07 /* Automation/Drive Int. Trans. Prot.*/ #define SCSI_PROTO_ADITP 0x07 /* Automation/Drive Int. Trans. Prot.*/ #define SCSI_PROTO_ATA 0x08 /* AT Attachment Interface */ #define SCSI_PROTO_UAS 0x09 /* USB Atached SCSI */ #define SCSI_PROTO_SOP 0x0a /* SCSI over PCI Express */ #define SCSI_PROTO_NONE 0x0f /* No specific protocol */ struct scsi_proto_specific_page { u_int8_t page_code; #define SPSP_PAGE_SAVABLE 0x80 /* Page is savable */ u_int8_t page_length; u_int8_t protocol; #define SPSP_PROTO_FC SCSI_PROTO_FC #define SPSP_PROTO_SPI SCSI_PROTO_SPI #define SPSP_PROTO_SSA SCSI_PROTO_SSA #define SPSP_PROTO_1394 SCSI_PROTO_1394 #define SPSP_PROTO_RDMA SCSI_PROTO_RDMA #define SPSP_PROTO_ISCSI SCSI_PROTO_ISCSI #define SPSP_PROTO_SAS SCSI_PROTO_SAS #define SPSP_PROTO_ADT SCSI_PROTO_ADITP #define SPSP_PROTO_ATA SCSI_PROTO_ATA #define SPSP_PROTO_UAS SCSI_PROTO_UAS #define SPSP_PROTO_SOP SCSI_PROTO_SOP #define SPSP_PROTO_NONE SCSI_PROTO_NONE }; struct scsi_reserve { u_int8_t opcode; u_int8_t byte2; #define SR_EXTENT 0x01 #define SR_ID_MASK 0x0e #define SR_3RDPTY 0x10 #define SR_LUN_MASK 0xe0 u_int8_t resv_id; u_int8_t length[2]; u_int8_t control; }; struct scsi_reserve_10 { uint8_t opcode; uint8_t byte2; #define SR10_3RDPTY 0x10 #define SR10_LONGID 0x02 #define SR10_EXTENT 0x01 uint8_t resv_id; uint8_t thirdparty_id; uint8_t reserved[3]; uint8_t length[2]; uint8_t control; }; struct scsi_release { u_int8_t opcode; u_int8_t byte2; u_int8_t resv_id; u_int8_t unused[1]; u_int8_t length; u_int8_t control; }; struct scsi_release_10 { uint8_t opcode; uint8_t byte2; uint8_t resv_id; uint8_t thirdparty_id; uint8_t reserved[3]; uint8_t length[2]; uint8_t control; }; struct scsi_prevent { u_int8_t opcode; u_int8_t byte2; u_int8_t unused[2]; u_int8_t how; u_int8_t control; }; #define PR_PREVENT 0x01 #define PR_ALLOW 0x00 struct scsi_sync_cache { u_int8_t opcode; u_int8_t byte2; #define SSC_IMMED 0x02 #define SSC_RELADR 0x01 u_int8_t begin_lba[4]; u_int8_t reserved; u_int8_t lb_count[2]; u_int8_t control; }; struct scsi_sync_cache_16 { uint8_t opcode; uint8_t byte2; uint8_t begin_lba[8]; uint8_t lb_count[4]; uint8_t reserved; uint8_t control; }; struct scsi_format { uint8_t opcode; uint8_t byte2; #define SF_LONGLIST 0x20 #define SF_FMTDATA 0x10 #define SF_CMPLIST 0x08 #define SF_FORMAT_MASK 0x07 #define SF_FORMAT_BLOCK 0x00 #define SF_FORMAT_LONG_BLOCK 0x03 #define SF_FORMAT_BFI 0x04 #define SF_FORMAT_PHYS 0x05 uint8_t vendor; uint8_t interleave[2]; uint8_t control; }; struct scsi_format_header_short { uint8_t reserved; #define SF_DATA_FOV 0x80 #define SF_DATA_DPRY 0x40 #define SF_DATA_DCRT 0x20 #define SF_DATA_STPF 0x10 #define SF_DATA_IP 0x08 #define SF_DATA_DSP 0x04 #define SF_DATA_IMMED 0x02 #define SF_DATA_VS 0x01 uint8_t byte2; uint8_t defect_list_len[2]; }; struct scsi_format_header_long { uint8_t reserved; uint8_t byte2; uint8_t reserved2[2]; uint8_t defect_list_len[4]; }; struct scsi_changedef { u_int8_t opcode; u_int8_t byte2; u_int8_t unused1; u_int8_t how; u_int8_t unused[4]; u_int8_t datalen; u_int8_t control; }; struct scsi_read_buffer { u_int8_t opcode; u_int8_t byte2; #define RWB_MODE 0x1F #define RWB_MODE_HDR_DATA 0x00 #define RWB_MODE_VENDOR 0x01 #define RWB_MODE_DATA 0x02 #define RWB_MODE_DESCR 0x03 #define RWB_MODE_DOWNLOAD 0x04 #define RWB_MODE_DOWNLOAD_SAVE 0x05 #define RWB_MODE_ECHO 0x0A #define RWB_MODE_ECHO_DESCR 0x0B #define RWB_MODE_ERROR_HISTORY 0x1C u_int8_t buffer_id; u_int8_t offset[3]; u_int8_t length[3]; u_int8_t control; }; struct scsi_write_buffer { u_int8_t opcode; u_int8_t byte2; u_int8_t buffer_id; u_int8_t offset[3]; u_int8_t length[3]; u_int8_t control; }; struct scsi_read_attribute { u_int8_t opcode; u_int8_t service_action; #define SRA_SA_ATTR_VALUES 0x00 #define SRA_SA_ATTR_LIST 0x01 #define SRA_SA_LOG_VOL_LIST 0x02 #define SRA_SA_PART_LIST 0x03 #define SRA_SA_RESTRICTED 0x04 #define SRA_SA_SUPPORTED_ATTRS 0x05 #define SRA_SA_MASK 0x1f u_int8_t element[2]; u_int8_t elem_type; u_int8_t logical_volume; u_int8_t reserved1; u_int8_t partition; u_int8_t first_attribute[2]; u_int8_t length[4]; u_int8_t cache; #define SRA_CACHE 0x01 u_int8_t control; }; struct scsi_write_attribute { u_int8_t opcode; u_int8_t byte2; #define SWA_WTC 0x01 u_int8_t element[3]; u_int8_t logical_volume; u_int8_t reserved1; u_int8_t partition; u_int8_t reserved2[2]; u_int8_t length[4]; u_int8_t reserved3; u_int8_t control; }; struct scsi_read_attribute_values { u_int8_t length[4]; u_int8_t attribute_0[0]; }; struct scsi_mam_attribute_header { u_int8_t id[2]; /* * Attributes obtained from SPC-4r36g (section 7.4.2.2) and * SSC-4r03 (section 4.2.21). */ #define SMA_ATTR_ID_DEVICE_MIN 0x0000 #define SMA_ATTR_REM_CAP_PARTITION 0x0000 #define SMA_ATTR_MAX_CAP_PARTITION 0x0001 #define SMA_ATTR_TAPEALERT_FLAGS 0x0002 #define SMA_ATTR_LOAD_COUNT 0x0003 #define SMA_ATTR_MAM_SPACE_REMAINING 0x0004 #define SMA_ATTR_DEV_ASSIGNING_ORG 0x0005 #define SMA_ATTR_FORMAT_DENSITY_CODE 0x0006 #define SMA_ATTR_INITIALIZATION_COUNT 0x0007 #define SMA_ATTR_VOLUME_ID 0x0008 #define SMA_ATTR_VOLUME_CHANGE_REF 0x0009 #define SMA_ATTR_DEV_SERIAL_LAST_LOAD 0x020a #define SMA_ATTR_DEV_SERIAL_LAST_LOAD_1 0x020b #define SMA_ATTR_DEV_SERIAL_LAST_LOAD_2 0x020c #define SMA_ATTR_DEV_SERIAL_LAST_LOAD_3 0x020d #define SMA_ATTR_TOTAL_MB_WRITTEN_LT 0x0220 #define SMA_ATTR_TOTAL_MB_READ_LT 0x0221 #define SMA_ATTR_TOTAL_MB_WRITTEN_CUR 0x0222 #define SMA_ATTR_TOTAL_MB_READ_CUR 0x0223 #define SMA_ATTR_FIRST_ENC_BLOCK 0x0224 #define SMA_ATTR_NEXT_UNENC_BLOCK 0x0225 #define SMA_ATTR_MEDIUM_USAGE_HIST 0x0340 #define SMA_ATTR_PART_USAGE_HIST 0x0341 #define SMA_ATTR_ID_DEVICE_MAX 0x03ff #define SMA_ATTR_ID_MEDIUM_MIN 0x0400 #define SMA_ATTR_MED_MANUF 0x0400 #define SMA_ATTR_MED_SERIAL 0x0401 #define SMA_ATTR_MED_LENGTH 0x0402 #define SMA_ATTR_MED_WIDTH 0x0403 #define SMA_ATTR_MED_ASSIGNING_ORG 0x0404 #define SMA_ATTR_MED_DENSITY_CODE 0x0405 #define SMA_ATTR_MED_MANUF_DATE 0x0406 #define SMA_ATTR_MAM_CAPACITY 0x0407 #define SMA_ATTR_MED_TYPE 0x0408 #define SMA_ATTR_MED_TYPE_INFO 0x0409 #define SMA_ATTR_MED_SERIAL_NUM 0x040a #define SMA_ATTR_ID_MEDIUM_MAX 0x07ff #define SMA_ATTR_ID_HOST_MIN 0x0800 #define SMA_ATTR_APP_VENDOR 0x0800 #define SMA_ATTR_APP_NAME 0x0801 #define SMA_ATTR_APP_VERSION 0x0802 #define SMA_ATTR_USER_MED_TEXT_LABEL 0x0803 #define SMA_ATTR_LAST_WRITTEN_TIME 0x0804 #define SMA_ATTR_TEXT_LOCAL_ID 0x0805 #define SMA_ATTR_BARCODE 0x0806 #define SMA_ATTR_HOST_OWNER_NAME 0x0807 #define SMA_ATTR_MEDIA_POOL 0x0808 #define SMA_ATTR_PART_USER_LABEL 0x0809 #define SMA_ATTR_LOAD_UNLOAD_AT_PART 0x080a #define SMA_ATTR_APP_FORMAT_VERSION 0x080b #define SMA_ATTR_VOL_COHERENCY_INFO 0x080c #define SMA_ATTR_ID_HOST_MAX 0x0bff #define SMA_ATTR_VENDOR_DEVICE_MIN 0x0c00 #define SMA_ATTR_VENDOR_DEVICE_MAX 0x0fff #define SMA_ATTR_VENDOR_MEDIUM_MIN 0x1000 #define SMA_ATTR_VENDOR_MEDIUM_MAX 0x13ff #define SMA_ATTR_VENDOR_HOST_MIN 0x1400 #define SMA_ATTR_VENDOR_HOST_MAX 0x17ff u_int8_t byte2; #define SMA_FORMAT_BINARY 0x00 #define SMA_FORMAT_ASCII 0x01 #define SMA_FORMAT_TEXT 0x02 #define SMA_FORMAT_MASK 0x03 #define SMA_READ_ONLY 0x80 u_int8_t length[2]; u_int8_t attribute[0]; }; struct scsi_attrib_list_header { u_int8_t length[4]; u_int8_t first_attr_0[0]; }; struct scsi_attrib_lv_list { u_int8_t length[2]; u_int8_t first_lv_number; u_int8_t num_logical_volumes; }; struct scsi_attrib_vendser { uint8_t vendor[8]; uint8_t serial_num[32]; }; /* * These values are used to decode the Volume Coherency Information * Attribute (0x080c) for LTFS-format coherency information. * Although the Application Client Specific lengths are different for * Version 0 and Version 1, the data is in fact the same. The length * difference was due to a code bug. */ #define SCSI_LTFS_VER0_LEN 42 #define SCSI_LTFS_VER1_LEN 43 #define SCSI_LTFS_UUID_LEN 36 #define SCSI_LTFS_STR_NAME "LTFS" #define SCSI_LTFS_STR_LEN 4 typedef enum { SCSI_ATTR_FLAG_NONE = 0x00, SCSI_ATTR_FLAG_HEX = 0x01, SCSI_ATTR_FLAG_FP = 0x02, SCSI_ATTR_FLAG_DIV_10 = 0x04, SCSI_ATTR_FLAG_FP_1DIGIT = 0x08 } scsi_attrib_flags; typedef enum { SCSI_ATTR_OUTPUT_NONE = 0x00, SCSI_ATTR_OUTPUT_TEXT_MASK = 0x03, SCSI_ATTR_OUTPUT_TEXT_RAW = 0x00, SCSI_ATTR_OUTPUT_TEXT_ESC = 0x01, SCSI_ATTR_OUTPUT_TEXT_RSV1 = 0x02, SCSI_ATTR_OUTPUT_TEXT_RSV2 = 0x03, SCSI_ATTR_OUTPUT_NONASCII_MASK = 0x0c, SCSI_ATTR_OUTPUT_NONASCII_TRIM = 0x00, SCSI_ATTR_OUTPUT_NONASCII_ESC = 0x04, SCSI_ATTR_OUTPUT_NONASCII_RAW = 0x08, SCSI_ATTR_OUTPUT_NONASCII_RSV1 = 0x0c, SCSI_ATTR_OUTPUT_FIELD_MASK = 0xf0, SCSI_ATTR_OUTPUT_FIELD_ALL = 0xf0, SCSI_ATTR_OUTPUT_FIELD_NONE = 0x00, SCSI_ATTR_OUTPUT_FIELD_DESC = 0x10, SCSI_ATTR_OUTPUT_FIELD_NUM = 0x20, SCSI_ATTR_OUTPUT_FIELD_SIZE = 0x40, SCSI_ATTR_OUTPUT_FIELD_RW = 0x80 } scsi_attrib_output_flags; struct sbuf; struct scsi_attrib_table_entry { u_int32_t id; u_int32_t flags; const char *desc; const char *suffix; int (*to_str)(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); int (*parse_str)(char *str, struct scsi_mam_attribute_header *hdr, uint32_t alloc_len, uint32_t flags, char *error_str, int error_str_len); }; struct scsi_rw_6 { u_int8_t opcode; u_int8_t addr[3]; /* only 5 bits are valid in the MSB address byte */ #define SRW_TOPADDR 0x1F u_int8_t length; u_int8_t control; }; struct scsi_rw_10 { u_int8_t opcode; #define SRW10_RELADDR 0x01 /* EBP defined for WRITE(10) only */ #define SRW10_EBP 0x04 #define SRW10_FUA 0x08 #define SRW10_DPO 0x10 u_int8_t byte2; u_int8_t addr[4]; u_int8_t reserved; u_int8_t length[2]; u_int8_t control; }; struct scsi_rw_12 { u_int8_t opcode; #define SRW12_RELADDR 0x01 #define SRW12_FUA 0x08 #define SRW12_DPO 0x10 u_int8_t byte2; u_int8_t addr[4]; u_int8_t length[4]; u_int8_t reserved; u_int8_t control; }; struct scsi_rw_16 { u_int8_t opcode; #define SRW16_RELADDR 0x01 #define SRW16_FUA 0x08 #define SRW16_DPO 0x10 u_int8_t byte2; u_int8_t addr[8]; u_int8_t length[4]; u_int8_t reserved; u_int8_t control; }; struct scsi_write_same_10 { uint8_t opcode; uint8_t byte2; #define SWS_LBDATA 0x02 #define SWS_PBDATA 0x04 #define SWS_UNMAP 0x08 #define SWS_ANCHOR 0x10 uint8_t addr[4]; uint8_t group; uint8_t length[2]; uint8_t control; }; struct scsi_write_same_16 { uint8_t opcode; uint8_t byte2; #define SWS_NDOB 0x01 uint8_t addr[8]; uint8_t length[4]; uint8_t group; uint8_t control; }; struct scsi_unmap { uint8_t opcode; uint8_t byte2; #define SU_ANCHOR 0x01 uint8_t reserved[4]; uint8_t group; uint8_t length[2]; uint8_t control; }; struct scsi_unmap_header { uint8_t length[2]; uint8_t desc_length[2]; uint8_t reserved[4]; }; struct scsi_unmap_desc { uint8_t lba[8]; uint8_t length[4]; uint8_t reserved[4]; }; struct scsi_write_verify_10 { uint8_t opcode; uint8_t byte2; #define SWV_BYTCHK 0x02 #define SWV_DPO 0x10 #define SWV_WRPROECT_MASK 0xe0 uint8_t addr[4]; uint8_t group; uint8_t length[2]; uint8_t control; }; struct scsi_write_verify_12 { uint8_t opcode; uint8_t byte2; uint8_t addr[4]; uint8_t length[4]; uint8_t group; uint8_t control; }; struct scsi_write_verify_16 { uint8_t opcode; uint8_t byte2; uint8_t addr[8]; uint8_t length[4]; uint8_t group; uint8_t control; }; struct scsi_start_stop_unit { u_int8_t opcode; u_int8_t byte2; #define SSS_IMMED 0x01 u_int8_t reserved[2]; u_int8_t how; #define SSS_START 0x01 #define SSS_LOEJ 0x02 #define SSS_PC_MASK 0xf0 #define SSS_PC_START_VALID 0x00 #define SSS_PC_ACTIVE 0x10 #define SSS_PC_IDLE 0x20 #define SSS_PC_STANDBY 0x30 #define SSS_PC_LU_CONTROL 0x70 #define SSS_PC_FORCE_IDLE_0 0xa0 #define SSS_PC_FORCE_STANDBY_0 0xb0 u_int8_t control; }; struct ata_pass_12 { u_int8_t opcode; u_int8_t protocol; #define AP_PROTO_HARD_RESET (0x00 << 1) #define AP_PROTO_SRST (0x01 << 1) #define AP_PROTO_NON_DATA (0x03 << 1) #define AP_PROTO_PIO_IN (0x04 << 1) #define AP_PROTO_PIO_OUT (0x05 << 1) #define AP_PROTO_DMA (0x06 << 1) #define AP_PROTO_DMA_QUEUED (0x07 << 1) #define AP_PROTO_DEVICE_DIAG (0x08 << 1) #define AP_PROTO_DEVICE_RESET (0x09 << 1) #define AP_PROTO_UDMA_IN (0x0a << 1) #define AP_PROTO_UDMA_OUT (0x0b << 1) #define AP_PROTO_FPDMA (0x0c << 1) #define AP_PROTO_RESP_INFO (0x0f << 1) #define AP_MULTI 0xe0 u_int8_t flags; #define AP_T_LEN 0x03 #define AP_BB 0x04 #define AP_T_DIR 0x08 #define AP_CK_COND 0x20 #define AP_OFFLINE 0x60 u_int8_t features; u_int8_t sector_count; u_int8_t lba_low; u_int8_t lba_mid; u_int8_t lba_high; u_int8_t device; u_int8_t command; u_int8_t reserved; u_int8_t control; }; struct scsi_maintenance_in { uint8_t opcode; uint8_t byte2; #define SERVICE_ACTION_MASK 0x1f #define SA_RPRT_TRGT_GRP 0x0a uint8_t reserved[4]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_report_supported_opcodes { uint8_t opcode; uint8_t service_action; uint8_t options; #define RSO_RCTD 0x80 #define RSO_OPTIONS_MASK 0x07 #define RSO_OPTIONS_ALL 0x00 #define RSO_OPTIONS_OC 0x01 #define RSO_OPTIONS_OC_SA 0x02 uint8_t requested_opcode; uint8_t requested_service_action[2]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_report_supported_opcodes_timeout { uint8_t length[2]; uint8_t reserved; uint8_t cmd_specific; uint8_t nominal_time[4]; uint8_t recommended_time[4]; }; struct scsi_report_supported_opcodes_descr { uint8_t opcode; uint8_t reserved; uint8_t service_action[2]; uint8_t reserved2; uint8_t flags; #define RSO_SERVACTV 0x01 #define RSO_CTDP 0x02 uint8_t cdb_length[2]; struct scsi_report_supported_opcodes_timeout timeout[0]; }; struct scsi_report_supported_opcodes_all { uint8_t length[4]; struct scsi_report_supported_opcodes_descr descr[0]; }; struct scsi_report_supported_opcodes_one { uint8_t reserved; uint8_t support; #define RSO_ONE_CTDP 0x80 #define RSO_ONE_SUP_MASK 0x07 #define RSO_ONE_SUP_UNAVAIL 0x00 #define RSO_ONE_SUP_NOT_SUP 0x01 #define RSO_ONE_SUP_AVAIL 0x03 #define RSO_ONE_SUP_VENDOR 0x05 uint8_t cdb_length[2]; uint8_t cdb_usage[]; }; struct scsi_report_supported_tmf { uint8_t opcode; uint8_t service_action; uint8_t reserved[4]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_report_supported_tmf_data { uint8_t byte1; #define RST_WAKES 0x01 #define RST_TRS 0x02 #define RST_QTS 0x04 #define RST_LURS 0x08 #define RST_CTSS 0x10 #define RST_CACAS 0x20 #define RST_ATSS 0x40 #define RST_ATS 0x80 uint8_t byte2; #define RST_ITNRS 0x01 #define RST_QTSS 0x02 #define RST_QAES 0x04 uint8_t reserved[2]; }; struct scsi_report_timestamp { uint8_t opcode; uint8_t service_action; uint8_t reserved[4]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_report_timestamp_data { uint8_t length[2]; uint8_t origin; #define RTS_ORIG_MASK 0x00 #define RTS_ORIG_ZERO 0x00 #define RTS_ORIG_SET 0x02 #define RTS_ORIG_OUTSIDE 0x03 uint8_t reserved; uint8_t timestamp[6]; uint8_t reserve2[2]; }; struct scsi_receive_copy_status_lid1 { uint8_t opcode; uint8_t service_action; #define RCS_RCS_LID1 0x00 uint8_t list_identifier; uint8_t reserved[7]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_receive_copy_status_lid1_data { uint8_t available_data[4]; uint8_t copy_command_status; #define RCS_CCS_INPROG 0x00 #define RCS_CCS_COMPLETED 0x01 #define RCS_CCS_ERROR 0x02 uint8_t segments_processed[2]; uint8_t transfer_count_units; #define RCS_TC_BYTES 0x00 #define RCS_TC_KBYTES 0x01 #define RCS_TC_MBYTES 0x02 #define RCS_TC_GBYTES 0x03 #define RCS_TC_TBYTES 0x04 #define RCS_TC_PBYTES 0x05 #define RCS_TC_EBYTES 0x06 #define RCS_TC_LBAS 0xf1 uint8_t transfer_count[4]; }; struct scsi_receive_copy_failure_details { uint8_t opcode; uint8_t service_action; #define RCS_RCFD 0x04 uint8_t list_identifier; uint8_t reserved[7]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_receive_copy_failure_details_data { uint8_t available_data[4]; uint8_t reserved[52]; uint8_t copy_command_status; uint8_t reserved2; uint8_t sense_data_length[2]; uint8_t sense_data[]; }; struct scsi_receive_copy_status_lid4 { uint8_t opcode; uint8_t service_action; #define RCS_RCS_LID4 0x05 uint8_t list_identifier[4]; uint8_t reserved[4]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_receive_copy_status_lid4_data { uint8_t available_data[4]; uint8_t response_to_service_action; uint8_t copy_command_status; #define RCS_CCS_COMPLETED_PROD 0x03 #define RCS_CCS_COMPLETED_RESID 0x04 #define RCS_CCS_INPROG_FGBG 0x10 #define RCS_CCS_INPROG_FG 0x11 #define RCS_CCS_INPROG_BG 0x12 #define RCS_CCS_ABORTED 0x60 uint8_t operation_counter[2]; uint8_t estimated_status_update_delay[4]; uint8_t extended_copy_completion_status; uint8_t length_of_the_sense_data_field; uint8_t sense_data_length; uint8_t transfer_count_units; uint8_t transfer_count[8]; uint8_t segments_processed[2]; uint8_t reserved[6]; uint8_t sense_data[]; }; struct scsi_receive_copy_operating_parameters { uint8_t opcode; uint8_t service_action; #define RCS_RCOP 0x03 uint8_t reserved[8]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_receive_copy_operating_parameters_data { uint8_t length[4]; uint8_t snlid; #define RCOP_SNLID 0x01 uint8_t reserved[3]; uint8_t maximum_cscd_descriptor_count[2]; uint8_t maximum_segment_descriptor_count[2]; uint8_t maximum_descriptor_list_length[4]; uint8_t maximum_segment_length[4]; uint8_t maximum_inline_data_length[4]; uint8_t held_data_limit[4]; uint8_t maximum_stream_device_transfer_size[4]; uint8_t reserved2[2]; uint8_t total_concurrent_copies[2]; uint8_t maximum_concurrent_copies; uint8_t data_segment_granularity; uint8_t inline_data_granularity; uint8_t held_data_granularity; uint8_t reserved3[3]; uint8_t implemented_descriptor_list_length; uint8_t list_of_implemented_descriptor_type_codes[0]; }; struct scsi_extended_copy { uint8_t opcode; uint8_t service_action; #define EC_EC_LID1 0x00 #define EC_EC_LID4 0x01 uint8_t reserved[8]; uint8_t length[4]; uint8_t reserved1; uint8_t control; }; struct scsi_ec_cscd_dtsp { uint8_t flags; #define EC_CSCD_FIXED 0x01 #define EC_CSCD_PAD 0x04 uint8_t block_length[3]; }; struct scsi_ec_cscd { uint8_t type_code; #define EC_CSCD_EXT 0xff uint8_t luidt_pdt; #define EC_NUL 0x20 #define EC_LUIDT_MASK 0xc0 #define EC_LUIDT_LUN 0x00 #define EC_LUIDT_PROXY_TOKEN 0x40 uint8_t relative_initiator_port[2]; uint8_t cscd_params[24]; struct scsi_ec_cscd_dtsp dtsp; }; struct scsi_ec_cscd_id { uint8_t type_code; #define EC_CSCD_ID 0xe4 uint8_t luidt_pdt; uint8_t relative_initiator_port[2]; uint8_t codeset; uint8_t id_type; uint8_t reserved; uint8_t length; uint8_t designator[20]; struct scsi_ec_cscd_dtsp dtsp; }; struct scsi_ec_segment { uint8_t type_code; uint8_t flags; #define EC_SEG_DC 0x02 #define EC_SEG_CAT 0x01 uint8_t descr_length[2]; uint8_t params[]; }; struct scsi_ec_segment_b2b { uint8_t type_code; #define EC_SEG_B2B 0x02 uint8_t flags; uint8_t descr_length[2]; uint8_t src_cscd[2]; uint8_t dst_cscd[2]; uint8_t reserved[2]; uint8_t number_of_blocks[2]; uint8_t src_lba[8]; uint8_t dst_lba[8]; }; struct scsi_ec_segment_verify { uint8_t type_code; #define EC_SEG_VERIFY 0x07 uint8_t reserved; uint8_t descr_length[2]; uint8_t src_cscd[2]; uint8_t reserved2[2]; uint8_t tur; uint8_t reserved3[3]; }; struct scsi_ec_segment_register_key { uint8_t type_code; #define EC_SEG_REGISTER_KEY 0x14 uint8_t reserved; uint8_t descr_length[2]; uint8_t reserved2[2]; uint8_t dst_cscd[2]; uint8_t res_key[8]; uint8_t sa_res_key[8]; uint8_t reserved3[4]; }; struct scsi_extended_copy_lid1_data { uint8_t list_identifier; uint8_t flags; #define EC_PRIORITY 0x07 #define EC_LIST_ID_USAGE_MASK 0x18 #define EC_LIST_ID_USAGE_FULL 0x08 #define EC_LIST_ID_USAGE_NOHOLD 0x10 #define EC_LIST_ID_USAGE_NONE 0x18 #define EC_STR 0x20 uint8_t cscd_list_length[2]; uint8_t reserved[4]; uint8_t segment_list_length[4]; uint8_t inline_data_length[4]; uint8_t data[]; }; struct scsi_extended_copy_lid4_data { uint8_t list_format; #define EC_LIST_FORMAT 0x01 uint8_t flags; uint8_t header_cscd_list_length[2]; uint8_t reserved[11]; uint8_t flags2; #define EC_IMMED 0x01 #define EC_G_SENSE 0x02 uint8_t header_cscd_type_code; uint8_t reserved2[3]; uint8_t list_identifier[4]; uint8_t reserved3[18]; uint8_t cscd_list_length[2]; uint8_t segment_list_length[2]; uint8_t inline_data_length[2]; uint8_t data[]; }; struct scsi_copy_operation_abort { uint8_t opcode; uint8_t service_action; #define EC_COA 0x1c uint8_t list_identifier[4]; uint8_t reserved[9]; uint8_t control; }; struct scsi_populate_token { uint8_t opcode; uint8_t service_action; #define EC_PT 0x10 uint8_t reserved[4]; uint8_t list_identifier[4]; uint8_t length[4]; uint8_t group_number; uint8_t control; }; struct scsi_range_desc { uint8_t lba[8]; uint8_t length[4]; uint8_t reserved[4]; }; struct scsi_populate_token_data { uint8_t length[2]; uint8_t flags; #define EC_PT_IMMED 0x01 #define EC_PT_RTV 0x02 uint8_t reserved; uint8_t inactivity_timeout[4]; uint8_t rod_type[4]; uint8_t reserved2[2]; uint8_t range_descriptor_length[2]; struct scsi_range_desc desc[]; }; struct scsi_write_using_token { uint8_t opcode; uint8_t service_action; #define EC_WUT 0x11 uint8_t reserved[4]; uint8_t list_identifier[4]; uint8_t length[4]; uint8_t group_number; uint8_t control; }; struct scsi_write_using_token_data { uint8_t length[2]; uint8_t flags; #define EC_WUT_IMMED 0x01 #define EC_WUT_DEL_TKN 0x02 uint8_t reserved[5]; uint8_t offset_into_rod[8]; uint8_t rod_token[512]; uint8_t reserved2[6]; uint8_t range_descriptor_length[2]; struct scsi_range_desc desc[]; }; struct scsi_receive_rod_token_information { uint8_t opcode; uint8_t service_action; #define RCS_RRTI 0x07 uint8_t list_identifier[4]; uint8_t reserved[4]; uint8_t length[4]; uint8_t reserved2; uint8_t control; }; struct scsi_token { uint8_t type[4]; #define ROD_TYPE_INTERNAL 0x00000000 #define ROD_TYPE_AUR 0x00010000 #define ROD_TYPE_PIT_DEF 0x00800000 #define ROD_TYPE_PIT_VULN 0x00800001 #define ROD_TYPE_PIT_PERS 0x00800002 #define ROD_TYPE_PIT_ANY 0x0080FFFF #define ROD_TYPE_BLOCK_ZERO 0xFFFF0001 uint8_t reserved[2]; uint8_t length[2]; uint8_t body[0]; }; struct scsi_report_all_rod_tokens { uint8_t opcode; uint8_t service_action; #define RCS_RART 0x08 uint8_t reserved[8]; uint8_t length[4]; uint8_t reserved2; uint8_t control; }; struct scsi_report_all_rod_tokens_data { uint8_t available_data[4]; uint8_t reserved[4]; uint8_t rod_management_token_list[]; }; struct ata_pass_16 { u_int8_t opcode; u_int8_t protocol; #define AP_EXTEND 0x01 u_int8_t flags; #define AP_FLAG_TLEN_NO_DATA (0 << 0) #define AP_FLAG_TLEN_FEAT (1 << 0) #define AP_FLAG_TLEN_SECT_CNT (2 << 0) #define AP_FLAG_TLEN_STPSIU (3 << 0) #define AP_FLAG_BYT_BLOK_BYTES (0 << 2) #define AP_FLAG_BYT_BLOK_BLOCKS (1 << 2) #define AP_FLAG_TDIR_TO_DEV (0 << 3) #define AP_FLAG_TDIR_FROM_DEV (1 << 3) #define AP_FLAG_CHK_COND (1 << 5) u_int8_t features_ext; u_int8_t features; u_int8_t sector_count_ext; u_int8_t sector_count; u_int8_t lba_low_ext; u_int8_t lba_low; u_int8_t lba_mid_ext; u_int8_t lba_mid; u_int8_t lba_high_ext; u_int8_t lba_high; u_int8_t device; u_int8_t command; u_int8_t control; }; #define SC_SCSI_1 0x01 #define SC_SCSI_2 0x03 /* * Opcodes */ #define TEST_UNIT_READY 0x00 #define REQUEST_SENSE 0x03 #define READ_6 0x08 #define WRITE_6 0x0A #define INQUIRY 0x12 #define MODE_SELECT_6 0x15 #define MODE_SENSE_6 0x1A #define START_STOP_UNIT 0x1B #define START_STOP 0x1B #define RESERVE 0x16 #define RELEASE 0x17 #define RECEIVE_DIAGNOSTIC 0x1C #define SEND_DIAGNOSTIC 0x1D #define PREVENT_ALLOW 0x1E #define READ_CAPACITY 0x25 #define READ_10 0x28 #define WRITE_10 0x2A #define POSITION_TO_ELEMENT 0x2B #define WRITE_VERIFY_10 0x2E #define VERIFY_10 0x2F #define SYNCHRONIZE_CACHE 0x35 #define READ_DEFECT_DATA_10 0x37 #define WRITE_BUFFER 0x3B #define READ_BUFFER 0x3C #define CHANGE_DEFINITION 0x40 #define WRITE_SAME_10 0x41 #define UNMAP 0x42 #define LOG_SELECT 0x4C #define LOG_SENSE 0x4D #define MODE_SELECT_10 0x55 #define RESERVE_10 0x56 #define RELEASE_10 0x57 #define MODE_SENSE_10 0x5A #define PERSISTENT_RES_IN 0x5E #define PERSISTENT_RES_OUT 0x5F #define EXTENDED_COPY 0x83 #define RECEIVE_COPY_STATUS 0x84 #define ATA_PASS_16 0x85 #define READ_16 0x88 #define COMPARE_AND_WRITE 0x89 #define WRITE_16 0x8A #define READ_ATTRIBUTE 0x8C #define WRITE_ATTRIBUTE 0x8D #define WRITE_VERIFY_16 0x8E #define VERIFY_16 0x8F #define SYNCHRONIZE_CACHE_16 0x91 #define WRITE_SAME_16 0x93 #define WRITE_ATOMIC_16 0x9C #define SERVICE_ACTION_IN 0x9E #define REPORT_LUNS 0xA0 #define ATA_PASS_12 0xA1 #define SECURITY_PROTOCOL_IN 0xA2 #define MAINTENANCE_IN 0xA3 #define MAINTENANCE_OUT 0xA4 #define MOVE_MEDIUM 0xA5 #define READ_12 0xA8 #define WRITE_12 0xAA #define WRITE_VERIFY_12 0xAE #define VERIFY_12 0xAF #define SECURITY_PROTOCOL_OUT 0xB5 #define READ_ELEMENT_STATUS 0xB8 #define READ_CD 0xBE /* Maintenance In Service Action Codes */ #define REPORT_IDENTIFYING_INFRMATION 0x05 #define REPORT_TARGET_PORT_GROUPS 0x0A #define REPORT_ALIASES 0x0B #define REPORT_SUPPORTED_OPERATION_CODES 0x0C #define REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0D #define REPORT_PRIORITY 0x0E #define REPORT_TIMESTAMP 0x0F #define MANAGEMENT_PROTOCOL_IN 0x10 /* Maintenance Out Service Action Codes */ #define SET_IDENTIFY_INFORMATION 0x06 #define SET_TARGET_PORT_GROUPS 0x0A #define CHANGE_ALIASES 0x0B #define SET_PRIORITY 0x0E #define SET_TIMESTAMP 0x0F #define MANGAEMENT_PROTOCOL_OUT 0x10 /* * Device Types */ #define T_DIRECT 0x00 #define T_SEQUENTIAL 0x01 #define T_PRINTER 0x02 #define T_PROCESSOR 0x03 #define T_WORM 0x04 #define T_CDROM 0x05 #define T_SCANNER 0x06 #define T_OPTICAL 0x07 #define T_CHANGER 0x08 #define T_COMM 0x09 #define T_ASC0 0x0a #define T_ASC1 0x0b #define T_STORARRAY 0x0c #define T_ENCLOSURE 0x0d #define T_RBC 0x0e #define T_OCRW 0x0f #define T_OSD 0x11 #define T_ADC 0x12 #define T_NODEVICE 0x1f #define T_ANY 0xff /* Used in Quirk table matches */ #define T_REMOV 1 #define T_FIXED 0 /* * This length is the initial inquiry length used by the probe code, as * well as the length necessary for scsi_print_inquiry() to function * correctly. If either use requires a different length in the future, * the two values should be de-coupled. */ #define SHORT_INQUIRY_LENGTH 36 struct scsi_inquiry_data { u_int8_t device; #define SID_TYPE(inq_data) ((inq_data)->device & 0x1f) #define SID_QUAL(inq_data) (((inq_data)->device & 0xE0) >> 5) #define SID_QUAL_LU_CONNECTED 0x00 /* * The specified peripheral device * type is currently connected to * logical unit. If the target cannot * determine whether or not a physical * device is currently connected, it * shall also use this peripheral * qualifier when returning the INQUIRY * data. This peripheral qualifier * does not mean that the device is * ready for access by the initiator. */ #define SID_QUAL_LU_OFFLINE 0x01 /* * The target is capable of supporting * the specified peripheral device type * on this logical unit; however, the * physical device is not currently * connected to this logical unit. */ #define SID_QUAL_RSVD 0x02 #define SID_QUAL_BAD_LU 0x03 /* * The target is not capable of * supporting a physical device on * this logical unit. For this * peripheral qualifier the peripheral * device type shall be set to 1Fh to * provide compatibility with previous * versions of SCSI. All other * peripheral device type values are * reserved for this peripheral * qualifier. */ #define SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ((SID_QUAL(inq_data) & 0x04) != 0) u_int8_t dev_qual2; #define SID_QUAL2 0x7F #define SID_LU_CONG 0x40 #define SID_RMB 0x80 #define SID_IS_REMOVABLE(inq_data) (((inq_data)->dev_qual2 & SID_RMB) != 0) u_int8_t version; #define SID_ANSI_REV(inq_data) ((inq_data)->version & 0x07) #define SCSI_REV_0 0 #define SCSI_REV_CCS 1 #define SCSI_REV_2 2 #define SCSI_REV_SPC 3 #define SCSI_REV_SPC2 4 #define SCSI_REV_SPC3 5 #define SCSI_REV_SPC4 6 #define SID_ECMA 0x38 #define SID_ISO 0xC0 u_int8_t response_format; #define SID_AENC 0x80 #define SID_TrmIOP 0x40 #define SID_NormACA 0x20 #define SID_HiSup 0x10 u_int8_t additional_length; #define SID_ADDITIONAL_LENGTH(iqd) \ ((iqd)->additional_length + \ __offsetof(struct scsi_inquiry_data, additional_length) + 1) u_int8_t spc3_flags; #define SPC3_SID_PROTECT 0x01 #define SPC3_SID_3PC 0x08 #define SPC3_SID_TPGS_MASK 0x30 #define SPC3_SID_TPGS_IMPLICIT 0x10 #define SPC3_SID_TPGS_EXPLICIT 0x20 #define SPC3_SID_ACC 0x40 #define SPC3_SID_SCCS 0x80 u_int8_t spc2_flags; #define SPC2_SID_ADDR16 0x01 #define SPC2_SID_MChngr 0x08 #define SPC2_SID_MultiP 0x10 #define SPC2_SID_EncServ 0x40 #define SPC2_SID_BQueue 0x80 #define INQ_DATA_TQ_ENABLED(iqd) \ ((SID_ANSI_REV(iqd) < SCSI_REV_SPC2)? ((iqd)->flags & SID_CmdQue) : \ (((iqd)->flags & SID_CmdQue) && !((iqd)->spc2_flags & SPC2_SID_BQueue)) || \ (!((iqd)->flags & SID_CmdQue) && ((iqd)->spc2_flags & SPC2_SID_BQueue))) u_int8_t flags; #define SID_SftRe 0x01 #define SID_CmdQue 0x02 #define SID_Linked 0x08 #define SID_Sync 0x10 #define SID_WBus16 0x20 #define SID_WBus32 0x40 #define SID_RelAdr 0x80 #define SID_VENDOR_SIZE 8 char vendor[SID_VENDOR_SIZE]; #define SID_PRODUCT_SIZE 16 char product[SID_PRODUCT_SIZE]; #define SID_REVISION_SIZE 4 char revision[SID_REVISION_SIZE]; /* * The following fields were taken from SCSI Primary Commands - 2 * (SPC-2) Revision 14, Dated 11 November 1999 */ #define SID_VENDOR_SPECIFIC_0_SIZE 20 u_int8_t vendor_specific0[SID_VENDOR_SPECIFIC_0_SIZE]; /* * An extension of SCSI Parallel Specific Values */ #define SID_SPI_IUS 0x01 #define SID_SPI_QAS 0x02 #define SID_SPI_CLOCK_ST 0x00 #define SID_SPI_CLOCK_DT 0x04 #define SID_SPI_CLOCK_DT_ST 0x0C #define SID_SPI_MASK 0x0F u_int8_t spi3data; u_int8_t reserved2; /* * Version Descriptors, stored 2 byte values. */ u_int8_t version1[2]; u_int8_t version2[2]; u_int8_t version3[2]; u_int8_t version4[2]; u_int8_t version5[2]; u_int8_t version6[2]; u_int8_t version7[2]; u_int8_t version8[2]; u_int8_t reserved3[22]; #define SID_VENDOR_SPECIFIC_1_SIZE 160 u_int8_t vendor_specific1[SID_VENDOR_SPECIFIC_1_SIZE]; }; /* * This structure is more suited to initiator operation, because the * maximum number of supported pages is already allocated. */ struct scsi_vpd_supported_page_list { u_int8_t device; u_int8_t page_code; #define SVPD_SUPPORTED_PAGE_LIST 0x00 #define SVPD_SUPPORTED_PAGES_HDR_LEN 4 u_int8_t reserved; u_int8_t length; /* number of VPD entries */ #define SVPD_SUPPORTED_PAGES_SIZE 251 u_int8_t list[SVPD_SUPPORTED_PAGES_SIZE]; }; /* * This structure is more suited to target operation, because the * number of supported pages is left to the user to allocate. */ struct scsi_vpd_supported_pages { u_int8_t device; u_int8_t page_code; u_int8_t reserved; #define SVPD_SUPPORTED_PAGES 0x00 u_int8_t length; u_int8_t page_list[0]; }; struct scsi_vpd_unit_serial_number { u_int8_t device; u_int8_t page_code; #define SVPD_UNIT_SERIAL_NUMBER 0x80 u_int8_t reserved; u_int8_t length; /* serial number length */ #define SVPD_SERIAL_NUM_SIZE 251 u_int8_t serial_num[SVPD_SERIAL_NUM_SIZE]; }; struct scsi_vpd_device_id { u_int8_t device; u_int8_t page_code; #define SVPD_DEVICE_ID 0x83 #define SVPD_DEVICE_ID_MAX_SIZE 252 #define SVPD_DEVICE_ID_HDR_LEN \ __offsetof(struct scsi_vpd_device_id, desc_list) u_int8_t length[2]; u_int8_t desc_list[]; }; struct scsi_vpd_id_descriptor { u_int8_t proto_codeset; /* * See the SCSI_PROTO definitions above for the protocols. */ #define SVPD_ID_PROTO_SHIFT 4 #define SVPD_ID_CODESET_BINARY 0x01 #define SVPD_ID_CODESET_ASCII 0x02 #define SVPD_ID_CODESET_UTF8 0x03 #define SVPD_ID_CODESET_MASK 0x0f u_int8_t id_type; #define SVPD_ID_PIV 0x80 #define SVPD_ID_ASSOC_LUN 0x00 #define SVPD_ID_ASSOC_PORT 0x10 #define SVPD_ID_ASSOC_TARGET 0x20 #define SVPD_ID_ASSOC_MASK 0x30 #define SVPD_ID_TYPE_VENDOR 0x00 #define SVPD_ID_TYPE_T10 0x01 #define SVPD_ID_TYPE_EUI64 0x02 #define SVPD_ID_TYPE_NAA 0x03 #define SVPD_ID_TYPE_RELTARG 0x04 #define SVPD_ID_TYPE_TPORTGRP 0x05 #define SVPD_ID_TYPE_LUNGRP 0x06 #define SVPD_ID_TYPE_MD5_LUN_ID 0x07 #define SVPD_ID_TYPE_SCSI_NAME 0x08 #define SVPD_ID_TYPE_MASK 0x0f u_int8_t reserved; u_int8_t length; #define SVPD_DEVICE_ID_DESC_HDR_LEN \ __offsetof(struct scsi_vpd_id_descriptor, identifier) u_int8_t identifier[]; }; struct scsi_vpd_id_t10 { u_int8_t vendor[8]; u_int8_t vendor_spec_id[0]; }; struct scsi_vpd_id_eui64 { u_int8_t ieee_company_id[3]; u_int8_t extension_id[5]; }; struct scsi_vpd_id_naa_basic { uint8_t naa; /* big endian, packed: uint8_t naa : 4; uint8_t naa_desig : 4; */ #define SVPD_ID_NAA_NAA_SHIFT 4 #define SVPD_ID_NAA_IEEE_EXT 0x02 #define SVPD_ID_NAA_LOCAL_REG 0x03 #define SVPD_ID_NAA_IEEE_REG 0x05 #define SVPD_ID_NAA_IEEE_REG_EXT 0x06 uint8_t naa_data[]; }; struct scsi_vpd_id_naa_ieee_extended_id { uint8_t naa; uint8_t vendor_specific_id_a; uint8_t ieee_company_id[3]; uint8_t vendor_specific_id_b[4]; }; struct scsi_vpd_id_naa_local_reg { uint8_t naa; uint8_t local_value[7]; }; struct scsi_vpd_id_naa_ieee_reg { uint8_t naa; uint8_t reg_value[7]; /* big endian, packed: uint8_t naa_basic : 4; uint8_t ieee_company_id_0 : 4; uint8_t ieee_company_id_1[2]; uint8_t ieee_company_id_2 : 4; uint8_t vendor_specific_id_0 : 4; uint8_t vendor_specific_id_1[4]; */ }; struct scsi_vpd_id_naa_ieee_reg_extended { uint8_t naa; uint8_t reg_value[15]; /* big endian, packed: uint8_t naa_basic : 4; uint8_t ieee_company_id_0 : 4; uint8_t ieee_company_id_1[2]; uint8_t ieee_company_id_2 : 4; uint8_t vendor_specific_id_0 : 4; uint8_t vendor_specific_id_1[4]; uint8_t vendor_specific_id_ext[8]; */ }; struct scsi_vpd_id_rel_trgt_port_id { uint8_t obsolete[2]; uint8_t rel_trgt_port_id[2]; }; struct scsi_vpd_id_trgt_port_grp_id { uint8_t reserved[2]; uint8_t trgt_port_grp[2]; }; struct scsi_vpd_id_lun_grp_id { uint8_t reserved[2]; uint8_t log_unit_grp[2]; }; struct scsi_vpd_id_md5_lun_id { uint8_t lun_id[16]; }; struct scsi_vpd_id_scsi_name { uint8_t name_string[256]; }; struct scsi_service_action_in { uint8_t opcode; uint8_t service_action; uint8_t action_dependent[13]; uint8_t control; }; struct scsi_vpd_extended_inquiry_data { uint8_t device; uint8_t page_code; #define SVPD_EXTENDED_INQUIRY_DATA 0x86 uint8_t page_length[2]; uint8_t flags1; /* These values are for direct access devices */ #define SVPD_EID_AM_MASK 0xC0 #define SVPD_EID_AM_DEFER 0x80 #define SVPD_EID_AM_IMMED 0x40 #define SVPD_EID_AM_UNDEFINED 0x00 #define SVPD_EID_AM_RESERVED 0xc0 #define SVPD_EID_SPT 0x38 #define SVPD_EID_SPT_1 0x00 #define SVPD_EID_SPT_12 0x08 #define SVPD_EID_SPT_2 0x10 #define SVPD_EID_SPT_13 0x18 #define SVPD_EID_SPT_3 0x20 #define SVPD_EID_SPT_23 0x28 #define SVPD_EID_SPT_123 0x38 /* These values are for sequential access devices */ #define SVPD_EID_SA_SPT_LBP 0x08 #define SVPD_EID_GRD_CHK 0x04 #define SVPD_EID_APP_CHK 0x02 #define SVPD_EID_REF_CHK 0x01 uint8_t flags2; #define SVPD_EID_UASK_SUP 0x20 #define SVPD_EID_GROUP_SUP 0x10 #define SVPD_EID_PRIOR_SUP 0x08 #define SVPD_EID_HEADSUP 0x04 #define SVPD_EID_ORDSUP 0x02 #define SVPD_EID_SIMPSUP 0x01 uint8_t flags3; #define SVPD_EID_WU_SUP 0x08 #define SVPD_EID_CRD_SUP 0x04 #define SVPD_EID_NV_SUP 0x02 #define SVPD_EID_V_SUP 0x01 uint8_t flags4; #define SVPD_EID_P_I_I_SUP 0x10 #define SVPD_EID_LUICLT 0x01 uint8_t flags5; #define SVPD_EID_R_SUP 0x10 #define SVPD_EID_CBCS 0x01 uint8_t flags6; #define SVPD_EID_MULTI_I_T_FW 0x0F #define SVPD_EID_MC_VENDOR_SPEC 0x00 #define SVPD_EID_MC_MODE_1 0x01 #define SVPD_EID_MC_MODE_2 0x02 #define SVPD_EID_MC_MODE_3 0x03 uint8_t est[2]; uint8_t flags7; #define SVPD_EID_POA_SUP 0x80 #define SVPD_EID_HRA_SUP 0x80 #define SVPD_EID_VSA_SUP 0x80 uint8_t max_sense_length; uint8_t reserved2[50]; }; struct scsi_vpd_mode_page_policy_descr { uint8_t page_code; uint8_t subpage_code; uint8_t policy; #define SVPD_MPP_SHARED 0x00 #define SVPD_MPP_PORT 0x01 #define SVPD_MPP_I_T 0x03 #define SVPD_MPP_MLUS 0x80 uint8_t reserved; }; struct scsi_vpd_mode_page_policy { uint8_t device; uint8_t page_code; #define SVPD_MODE_PAGE_POLICY 0x87 uint8_t page_length[2]; struct scsi_vpd_mode_page_policy_descr descr[0]; }; struct scsi_diag_page { uint8_t page_code; uint8_t page_specific_flags; uint8_t length[2]; uint8_t params[0]; }; struct scsi_vpd_port_designation { uint8_t reserved[2]; uint8_t relative_port_id[2]; uint8_t reserved2[2]; uint8_t initiator_transportid_length[2]; uint8_t initiator_transportid[0]; }; struct scsi_vpd_port_designation_cont { uint8_t reserved[2]; uint8_t target_port_descriptors_length[2]; struct scsi_vpd_id_descriptor target_port_descriptors[0]; }; struct scsi_vpd_scsi_ports { u_int8_t device; u_int8_t page_code; #define SVPD_SCSI_PORTS 0x88 u_int8_t page_length[2]; struct scsi_vpd_port_designation design[]; }; /* * ATA Information VPD Page based on * T10/2126-D Revision 04 */ #define SVPD_ATA_INFORMATION 0x89 struct scsi_vpd_tpc_descriptor { uint8_t desc_type[2]; uint8_t desc_length[2]; uint8_t parameters[]; }; struct scsi_vpd_tpc_descriptor_bdrl { uint8_t desc_type[2]; #define SVPD_TPC_BDRL 0x0000 uint8_t desc_length[2]; uint8_t vendor_specific[6]; uint8_t maximum_ranges[2]; uint8_t maximum_inactivity_timeout[4]; uint8_t default_inactivity_timeout[4]; uint8_t maximum_token_transfer_size[8]; uint8_t optimal_transfer_count[8]; }; struct scsi_vpd_tpc_descriptor_sc_descr { uint8_t opcode; uint8_t sa_length; uint8_t supported_service_actions[0]; }; struct scsi_vpd_tpc_descriptor_sc { uint8_t desc_type[2]; #define SVPD_TPC_SC 0x0001 uint8_t desc_length[2]; uint8_t list_length; struct scsi_vpd_tpc_descriptor_sc_descr descr[]; }; struct scsi_vpd_tpc_descriptor_pd { uint8_t desc_type[2]; #define SVPD_TPC_PD 0x0004 uint8_t desc_length[2]; uint8_t reserved[4]; uint8_t maximum_cscd_descriptor_count[2]; uint8_t maximum_segment_descriptor_count[2]; uint8_t maximum_descriptor_list_length[4]; uint8_t maximum_inline_data_length[4]; uint8_t reserved2[12]; }; struct scsi_vpd_tpc_descriptor_sd { uint8_t desc_type[2]; #define SVPD_TPC_SD 0x0008 uint8_t desc_length[2]; uint8_t list_length; uint8_t supported_descriptor_codes[]; }; struct scsi_vpd_tpc_descriptor_sdid { uint8_t desc_type[2]; #define SVPD_TPC_SDID 0x000C uint8_t desc_length[2]; uint8_t list_length[2]; uint8_t supported_descriptor_ids[]; }; struct scsi_vpd_tpc_descriptor_rtf_block { uint8_t type_format; #define SVPD_TPC_RTF_BLOCK 0x00 uint8_t reserved; uint8_t desc_length[2]; uint8_t reserved2[2]; uint8_t optimal_length_granularity[2]; uint8_t maximum_bytes[8]; uint8_t optimal_bytes[8]; uint8_t optimal_bytes_to_token_per_segment[8]; uint8_t optimal_bytes_from_token_per_segment[8]; uint8_t reserved3[8]; }; struct scsi_vpd_tpc_descriptor_rtf { uint8_t desc_type[2]; #define SVPD_TPC_RTF 0x0106 uint8_t desc_length[2]; uint8_t remote_tokens; uint8_t reserved[11]; uint8_t minimum_token_lifetime[4]; uint8_t maximum_token_lifetime[4]; uint8_t maximum_token_inactivity_timeout[4]; uint8_t reserved2[18]; uint8_t type_specific_features_length[2]; uint8_t type_specific_features[0]; }; struct scsi_vpd_tpc_descriptor_srtd { uint8_t rod_type[4]; uint8_t flags; #define SVPD_TPC_SRTD_TOUT 0x01 #define SVPD_TPC_SRTD_TIN 0x02 #define SVPD_TPC_SRTD_ECPY 0x80 uint8_t reserved; uint8_t preference_indicator[2]; uint8_t reserved2[56]; }; struct scsi_vpd_tpc_descriptor_srt { uint8_t desc_type[2]; #define SVPD_TPC_SRT 0x0108 uint8_t desc_length[2]; uint8_t reserved[2]; uint8_t rod_type_descriptors_length[2]; uint8_t rod_type_descriptors[0]; }; struct scsi_vpd_tpc_descriptor_gco { uint8_t desc_type[2]; #define SVPD_TPC_GCO 0x8001 uint8_t desc_length[2]; uint8_t total_concurrent_copies[4]; uint8_t maximum_identified_concurrent_copies[4]; uint8_t maximum_segment_length[4]; uint8_t data_segment_granularity; uint8_t inline_data_granularity; uint8_t reserved[18]; }; struct scsi_vpd_tpc { uint8_t device; uint8_t page_code; #define SVPD_SCSI_TPC 0x8F uint8_t page_length[2]; struct scsi_vpd_tpc_descriptor descr[]; }; /* * Block Device Characteristics VPD Page based on * T10/1799-D Revision 31 */ struct scsi_vpd_block_characteristics { u_int8_t device; u_int8_t page_code; #define SVPD_BDC 0xB1 u_int8_t page_length[2]; u_int8_t medium_rotation_rate[2]; #define SVPD_BDC_RATE_NOT_REPORTED 0x00 #define SVPD_BDC_RATE_NON_ROTATING 0x01 u_int8_t reserved1; u_int8_t nominal_form_factor; #define SVPD_BDC_FORM_NOT_REPORTED 0x00 #define SVPD_BDC_FORM_5_25INCH 0x01 #define SVPD_BDC_FORM_3_5INCH 0x02 #define SVPD_BDC_FORM_2_5INCH 0x03 #define SVPD_BDC_FORM_1_5INCH 0x04 #define SVPD_BDC_FORM_LESSTHAN_1_5INCH 0x05 u_int8_t reserved2[56]; }; /* * Block Device Characteristics VPD Page */ struct scsi_vpd_block_device_characteristics { uint8_t device; uint8_t page_code; #define SVPD_BDC 0xB1 uint8_t page_length[2]; uint8_t medium_rotation_rate[2]; #define SVPD_NOT_REPORTED 0x0000 #define SVPD_NON_ROTATING 0x0001 uint8_t product_type; uint8_t wab_wac_ff; uint8_t flags; #define SVPD_VBULS 0x01 #define SVPD_FUAB 0x02 #define SVPD_HAW_ZBC 0x10 uint8_t reserved[55]; }; /* * Logical Block Provisioning VPD Page based on * T10/1799-D Revision 31 */ struct scsi_vpd_logical_block_prov { u_int8_t device; u_int8_t page_code; #define SVPD_LBP 0xB2 u_int8_t page_length[2]; #define SVPD_LBP_PL_BASIC 0x04 u_int8_t threshold_exponent; u_int8_t flags; #define SVPD_LBP_UNMAP 0x80 #define SVPD_LBP_WS16 0x40 #define SVPD_LBP_WS10 0x20 #define SVPD_LBP_RZ 0x04 #define SVPD_LBP_ANC_SUP 0x02 #define SVPD_LBP_DP 0x01 u_int8_t prov_type; #define SVPD_LBP_RESOURCE 0x01 #define SVPD_LBP_THIN 0x02 u_int8_t reserved; /* * Provisioning Group Descriptor can be here if SVPD_LBP_DP is set * Its size can be determined from page_length - 4 */ }; /* * Block Limits VDP Page based on SBC-4 Revision 2 */ struct scsi_vpd_block_limits { u_int8_t device; u_int8_t page_code; #define SVPD_BLOCK_LIMITS 0xB0 u_int8_t page_length[2]; #define SVPD_BL_PL_BASIC 0x10 #define SVPD_BL_PL_TP 0x3C u_int8_t reserved1; u_int8_t max_cmp_write_len; u_int8_t opt_txfer_len_grain[2]; u_int8_t max_txfer_len[4]; u_int8_t opt_txfer_len[4]; u_int8_t max_prefetch[4]; u_int8_t max_unmap_lba_cnt[4]; u_int8_t max_unmap_blk_cnt[4]; u_int8_t opt_unmap_grain[4]; u_int8_t unmap_grain_align[4]; u_int8_t max_write_same_length[8]; u_int8_t max_atomic_transfer_length[4]; u_int8_t atomic_alignment[4]; u_int8_t atomic_transfer_length_granularity[4]; u_int8_t reserved2[8]; }; struct scsi_read_capacity { u_int8_t opcode; u_int8_t byte2; #define SRC_RELADR 0x01 u_int8_t addr[4]; u_int8_t unused[2]; u_int8_t pmi; #define SRC_PMI 0x01 u_int8_t control; }; struct scsi_read_capacity_16 { uint8_t opcode; #define SRC16_SERVICE_ACTION 0x10 uint8_t service_action; uint8_t addr[8]; uint8_t alloc_len[4]; #define SRC16_PMI 0x01 #define SRC16_RELADR 0x02 uint8_t reladr; uint8_t control; }; struct scsi_read_capacity_data { u_int8_t addr[4]; u_int8_t length[4]; }; struct scsi_read_capacity_data_long { uint8_t addr[8]; uint8_t length[4]; #define SRC16_PROT_EN 0x01 #define SRC16_P_TYPE 0x0e #define SRC16_PTYPE_1 0x00 #define SRC16_PTYPE_2 0x02 #define SRC16_PTYPE_3 0x04 uint8_t prot; #define SRC16_LBPPBE 0x0f #define SRC16_PI_EXPONENT 0xf0 #define SRC16_PI_EXPONENT_SHIFT 4 uint8_t prot_lbppbe; #define SRC16_LALBA 0x3f #define SRC16_LBPRZ 0x40 #define SRC16_LBPME 0x80 /* * Alternate versions of these macros that are intended for use on a 16-bit * version of the lalba_lbp field instead of the array of 2 8 bit numbers. */ #define SRC16_LALBA_A 0x3fff #define SRC16_LBPRZ_A 0x4000 #define SRC16_LBPME_A 0x8000 uint8_t lalba_lbp[2]; uint8_t reserved[16]; }; struct scsi_get_lba_status { uint8_t opcode; #define SGLS_SERVICE_ACTION 0x12 uint8_t service_action; uint8_t addr[8]; uint8_t alloc_len[4]; uint8_t reserved; uint8_t control; }; struct scsi_get_lba_status_data_descr { uint8_t addr[8]; uint8_t length[4]; uint8_t status; uint8_t reserved[3]; }; struct scsi_get_lba_status_data { uint8_t length[4]; uint8_t reserved[4]; struct scsi_get_lba_status_data_descr descr[]; }; struct scsi_report_luns { uint8_t opcode; uint8_t reserved1; #define RPL_REPORT_DEFAULT 0x00 #define RPL_REPORT_WELLKNOWN 0x01 #define RPL_REPORT_ALL 0x02 uint8_t select_report; uint8_t reserved2[3]; uint8_t length[4]; uint8_t reserved3; uint8_t control; }; struct scsi_report_luns_lundata { uint8_t lundata[8]; #define RPL_LUNDATA_PERIPH_BUS_MASK 0x3f #define RPL_LUNDATA_FLAT_LUN_MASK 0x3f #define RPL_LUNDATA_FLAT_LUN_BITS 0x06 #define RPL_LUNDATA_LUN_TARG_MASK 0x3f #define RPL_LUNDATA_LUN_BUS_MASK 0xe0 #define RPL_LUNDATA_LUN_LUN_MASK 0x1f #define RPL_LUNDATA_EXT_LEN_MASK 0x30 #define RPL_LUNDATA_EXT_EAM_MASK 0x0f #define RPL_LUNDATA_EXT_EAM_WK 0x01 #define RPL_LUNDATA_EXT_EAM_NOT_SPEC 0x0f #define RPL_LUNDATA_ATYP_MASK 0xc0 /* MBZ for type 0 lun */ #define RPL_LUNDATA_ATYP_PERIPH 0x00 #define RPL_LUNDATA_ATYP_FLAT 0x40 #define RPL_LUNDATA_ATYP_LUN 0x80 #define RPL_LUNDATA_ATYP_EXTLUN 0xc0 }; struct scsi_report_luns_data { u_int8_t length[4]; /* length of LUN inventory, in bytes */ u_int8_t reserved[4]; /* unused */ /* * LUN inventory- we only support the type zero form for now. */ struct scsi_report_luns_lundata luns[0]; }; struct scsi_target_group { uint8_t opcode; uint8_t service_action; #define STG_PDF_MASK 0xe0 #define STG_PDF_LENGTH 0x00 #define STG_PDF_EXTENDED 0x20 uint8_t reserved1[4]; uint8_t length[4]; uint8_t reserved2; uint8_t control; }; struct scsi_target_port_descriptor { uint8_t reserved[2]; uint8_t relative_target_port_identifier[2]; uint8_t desc_list[]; }; struct scsi_target_port_group_descriptor { uint8_t pref_state; #define TPG_PRIMARY 0x80 #define TPG_ASYMMETRIC_ACCESS_STATE_MASK 0xf #define TPG_ASYMMETRIC_ACCESS_OPTIMIZED 0x0 #define TPG_ASYMMETRIC_ACCESS_NONOPTIMIZED 0x1 #define TPG_ASYMMETRIC_ACCESS_STANDBY 0x2 #define TPG_ASYMMETRIC_ACCESS_UNAVAILABLE 0x3 #define TPG_ASYMMETRIC_ACCESS_LBA_DEPENDENT 0x4 #define TPG_ASYMMETRIC_ACCESS_OFFLINE 0xE #define TPG_ASYMMETRIC_ACCESS_TRANSITIONING 0xF uint8_t support; #define TPG_AO_SUP 0x01 #define TPG_AN_SUP 0x02 #define TPG_S_SUP 0x04 #define TPG_U_SUP 0x08 #define TPG_LBD_SUP 0x10 #define TPG_O_SUP 0x40 #define TPG_T_SUP 0x80 uint8_t target_port_group[2]; uint8_t reserved; uint8_t status; #define TPG_UNAVLBL 0 #define TPG_SET_BY_STPG 0x01 #define TPG_IMPLICIT 0x02 uint8_t vendor_specific; uint8_t target_port_count; struct scsi_target_port_descriptor descriptors[]; }; struct scsi_target_group_data { uint8_t length[4]; /* length of returned data, in bytes */ struct scsi_target_port_group_descriptor groups[]; }; struct scsi_target_group_data_extended { uint8_t length[4]; /* length of returned data, in bytes */ uint8_t format_type; /* STG_PDF_LENGTH or STG_PDF_EXTENDED */ uint8_t implicit_transition_time; uint8_t reserved[2]; struct scsi_target_port_group_descriptor groups[]; }; struct scsi_security_protocol_in { uint8_t opcode; uint8_t security_protocol; #define SPI_PROT_INFORMATION 0x00 #define SPI_PROT_CBCS 0x07 #define SPI_PROT_TAPE_DATA_ENC 0x20 #define SPI_PROT_DATA_ENC_CONFIG 0x21 #define SPI_PROT_SA_CREATE_CAP 0x40 #define SPI_PROT_IKEV2_SCSI 0x41 #define SPI_PROT_JEDEC_UFS 0xEC #define SPI_PROT_SDCARD_TFSSS 0xED #define SPI_PROT_AUTH_HOST_TRANSIENT 0xEE #define SPI_PROT_ATA_DEVICE_PASSWORD 0xEF uint8_t security_protocol_specific[2]; uint8_t byte4; #define SPI_INC_512 0x80 uint8_t reserved1; uint8_t length[4]; uint8_t reserved2; uint8_t control; }; struct scsi_security_protocol_out { uint8_t opcode; uint8_t security_protocol; uint8_t security_protocol_specific[2]; uint8_t byte4; #define SPO_INC_512 0x80 uint8_t reserved1; uint8_t length[4]; uint8_t reserved2; uint8_t control; }; typedef enum { SSD_TYPE_NONE, SSD_TYPE_FIXED, SSD_TYPE_DESC } scsi_sense_data_type; typedef enum { SSD_ELEM_NONE, SSD_ELEM_SKIP, SSD_ELEM_DESC, SSD_ELEM_SKS, SSD_ELEM_COMMAND, SSD_ELEM_INFO, SSD_ELEM_FRU, SSD_ELEM_STREAM, SSD_ELEM_MAX } scsi_sense_elem_type; struct scsi_sense_data { uint8_t error_code; /* * SPC-4 says that the maximum length of sense data is 252 bytes. * So this structure is exactly 252 bytes log. */ #define SSD_FULL_SIZE 252 uint8_t sense_buf[SSD_FULL_SIZE - 1]; /* * XXX KDM is this still a reasonable minimum size? */ #define SSD_MIN_SIZE 18 /* * Maximum value for the extra_len field in the sense data. */ #define SSD_EXTRA_MAX 244 }; /* * Fixed format sense data. */ struct scsi_sense_data_fixed { u_int8_t error_code; #define SSD_ERRCODE 0x7F #define SSD_CURRENT_ERROR 0x70 #define SSD_DEFERRED_ERROR 0x71 #define SSD_ERRCODE_VALID 0x80 u_int8_t segment; u_int8_t flags; #define SSD_KEY 0x0F #define SSD_KEY_NO_SENSE 0x00 #define SSD_KEY_RECOVERED_ERROR 0x01 #define SSD_KEY_NOT_READY 0x02 #define SSD_KEY_MEDIUM_ERROR 0x03 #define SSD_KEY_HARDWARE_ERROR 0x04 #define SSD_KEY_ILLEGAL_REQUEST 0x05 #define SSD_KEY_UNIT_ATTENTION 0x06 #define SSD_KEY_DATA_PROTECT 0x07 #define SSD_KEY_BLANK_CHECK 0x08 #define SSD_KEY_Vendor_Specific 0x09 #define SSD_KEY_COPY_ABORTED 0x0a #define SSD_KEY_ABORTED_COMMAND 0x0b #define SSD_KEY_EQUAL 0x0c #define SSD_KEY_VOLUME_OVERFLOW 0x0d #define SSD_KEY_MISCOMPARE 0x0e #define SSD_KEY_COMPLETED 0x0f #define SSD_ILI 0x20 #define SSD_EOM 0x40 #define SSD_FILEMARK 0x80 u_int8_t info[4]; u_int8_t extra_len; u_int8_t cmd_spec_info[4]; u_int8_t add_sense_code; u_int8_t add_sense_code_qual; u_int8_t fru; u_int8_t sense_key_spec[3]; #define SSD_SCS_VALID 0x80 #define SSD_FIELDPTR_CMD 0x40 #define SSD_BITPTR_VALID 0x08 #define SSD_BITPTR_VALUE 0x07 u_int8_t extra_bytes[14]; #define SSD_FIXED_IS_PRESENT(sense, length, field) \ ((length >= (offsetof(struct scsi_sense_data_fixed, field) + \ sizeof(sense->field))) ? 1 :0) #define SSD_FIXED_IS_FILLED(sense, field) \ ((((offsetof(struct scsi_sense_data_fixed, field) + \ sizeof(sense->field)) - \ (offsetof(struct scsi_sense_data_fixed, extra_len) + \ sizeof(sense->extra_len))) <= sense->extra_len) ? 1 : 0) }; /* * Descriptor format sense data definitions. * Introduced in SPC-3. */ struct scsi_sense_data_desc { uint8_t error_code; #define SSD_DESC_CURRENT_ERROR 0x72 #define SSD_DESC_DEFERRED_ERROR 0x73 uint8_t sense_key; uint8_t add_sense_code; uint8_t add_sense_code_qual; uint8_t reserved[3]; /* * Note that SPC-4, section 4.5.2.1 says that the extra_len field * must be less than or equal to 244. */ uint8_t extra_len; uint8_t sense_desc[0]; #define SSD_DESC_IS_PRESENT(sense, length, field) \ ((length >= (offsetof(struct scsi_sense_data_desc, field) + \ sizeof(sense->field))) ? 1 :0) }; struct scsi_sense_desc_header { uint8_t desc_type; uint8_t length; }; /* * The information provide in the Information descriptor is device type or * command specific information, and defined in a command standard. * * Note that any changes to the field names or positions in this structure, * even reserved fields, should be accompanied by an examination of the * code in ctl_set_sense() that uses them. * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_info { uint8_t desc_type; #define SSD_DESC_INFO 0x00 uint8_t length; uint8_t byte2; #define SSD_INFO_VALID 0x80 uint8_t reserved; uint8_t info[8]; }; /* * Command-specific information depends on the command for which the * reported condition occured. * * Note that any changes to the field names or positions in this structure, * even reserved fields, should be accompanied by an examination of the * code in ctl_set_sense() that uses them. * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_command { uint8_t desc_type; #define SSD_DESC_COMMAND 0x01 uint8_t length; uint8_t reserved[2]; uint8_t command_info[8]; }; /* * Sense key specific descriptor. The sense key specific data format * depends on the sense key in question. * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_sks { uint8_t desc_type; #define SSD_DESC_SKS 0x02 uint8_t length; uint8_t reserved1[2]; uint8_t sense_key_spec[3]; #define SSD_SKS_VALID 0x80 uint8_t reserved2; }; /* * This is used for the Illegal Request sense key (0x05) only. */ struct scsi_sense_sks_field { uint8_t byte0; #define SSD_SKS_FIELD_VALID 0x80 #define SSD_SKS_FIELD_CMD 0x40 #define SSD_SKS_BPV 0x08 #define SSD_SKS_BIT_VALUE 0x07 uint8_t field[2]; }; /* * This is used for the Hardware Error (0x04), Medium Error (0x03) and * Recovered Error (0x01) sense keys. */ struct scsi_sense_sks_retry { uint8_t byte0; #define SSD_SKS_RETRY_VALID 0x80 uint8_t actual_retry_count[2]; }; /* * Used with the NO Sense (0x00) or Not Ready (0x02) sense keys. */ struct scsi_sense_sks_progress { uint8_t byte0; #define SSD_SKS_PROGRESS_VALID 0x80 uint8_t progress[2]; #define SSD_SKS_PROGRESS_DENOM 0x10000 }; /* * Used with the Copy Aborted (0x0a) sense key. */ struct scsi_sense_sks_segment { uint8_t byte0; #define SSD_SKS_SEGMENT_VALID 0x80 #define SSD_SKS_SEGMENT_SD 0x20 #define SSD_SKS_SEGMENT_BPV 0x08 #define SSD_SKS_SEGMENT_BITPTR 0x07 uint8_t field[2]; }; /* * Used with the Unit Attention (0x06) sense key. * * This is currently used to indicate that the unit attention condition * queue has overflowed (when the overflow bit is set). */ struct scsi_sense_sks_overflow { uint8_t byte0; #define SSD_SKS_OVERFLOW_VALID 0x80 #define SSD_SKS_OVERFLOW_SET 0x01 uint8_t reserved[2]; }; /* * This specifies which component is associated with the sense data. There * is no standard meaning for the fru value. * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_fru { uint8_t desc_type; #define SSD_DESC_FRU 0x03 uint8_t length; uint8_t reserved; uint8_t fru; }; /* * Used for Stream commands, defined in SSC-4. * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_stream { uint8_t desc_type; #define SSD_DESC_STREAM 0x04 uint8_t length; uint8_t reserved; uint8_t byte3; #define SSD_DESC_STREAM_FM 0x80 #define SSD_DESC_STREAM_EOM 0x40 #define SSD_DESC_STREAM_ILI 0x20 }; /* * Used for Block commands, defined in SBC-3. * * This is currently (as of SBC-3) only used for the Incorrect Length * Indication (ILI) bit, which says that the data length requested in the * READ LONG or WRITE LONG command did not match the length of the logical * block. * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_block { uint8_t desc_type; #define SSD_DESC_BLOCK 0x05 uint8_t length; uint8_t reserved; uint8_t byte3; #define SSD_DESC_BLOCK_ILI 0x20 }; /* * Used for Object-Based Storage Devices (OSD-3). * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_osd_objid { uint8_t desc_type; #define SSD_DESC_OSD_OBJID 0x06 uint8_t length; uint8_t reserved[6]; /* * XXX KDM provide the bit definitions here? There are a lot of * them, and we don't have an OSD driver yet. */ uint8_t not_init_cmds[4]; uint8_t completed_cmds[4]; uint8_t partition_id[8]; uint8_t object_id[8]; }; /* * Used for Object-Based Storage Devices (OSD-3). * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_osd_integrity { uint8_t desc_type; #define SSD_DESC_OSD_INTEGRITY 0x07 uint8_t length; uint8_t integ_check_val[32]; }; /* * Used for Object-Based Storage Devices (OSD-3). * * Maximum descriptors allowed: 1 (as of SPC-4) */ struct scsi_sense_osd_attr_id { uint8_t desc_type; #define SSD_DESC_OSD_ATTR_ID 0x08 uint8_t length; uint8_t reserved[2]; uint8_t attr_desc[0]; }; /* * Used with Sense keys No Sense (0x00) and Not Ready (0x02). * * Maximum descriptors allowed: 32 (as of SPC-4) */ struct scsi_sense_progress { uint8_t desc_type; #define SSD_DESC_PROGRESS 0x0a uint8_t length; uint8_t sense_key; uint8_t add_sense_code; uint8_t add_sense_code_qual; uint8_t reserved; uint8_t progress[2]; }; /* * This is typically forwarded as the result of an EXTENDED COPY command. * * Maximum descriptors allowed: 2 (as of SPC-4) */ struct scsi_sense_forwarded { uint8_t desc_type; #define SSD_DESC_FORWARDED 0x0c uint8_t length; uint8_t byte2; #define SSD_FORWARDED_FSDT 0x80 #define SSD_FORWARDED_SDS_MASK 0x0f #define SSD_FORWARDED_SDS_UNK 0x00 #define SSD_FORWARDED_SDS_EXSRC 0x01 #define SSD_FORWARDED_SDS_EXDST 0x02 }; /* * Vendor-specific sense descriptor. The desc_type field will be in the * range bewteen MIN and MAX inclusive. */ struct scsi_sense_vendor { uint8_t desc_type; #define SSD_DESC_VENDOR_MIN 0x80 #define SSD_DESC_VENDOR_MAX 0xff uint8_t length; uint8_t data[0]; }; struct scsi_mode_header_6 { u_int8_t data_length; /* Sense data length */ u_int8_t medium_type; u_int8_t dev_spec; u_int8_t blk_desc_len; }; struct scsi_mode_header_10 { u_int8_t data_length[2];/* Sense data length */ u_int8_t medium_type; u_int8_t dev_spec; u_int8_t unused[2]; u_int8_t blk_desc_len[2]; }; struct scsi_mode_page_header { u_int8_t page_code; #define SMPH_PS 0x80 #define SMPH_SPF 0x40 #define SMPH_PC_MASK 0x3f u_int8_t page_length; }; struct scsi_mode_page_header_sp { uint8_t page_code; uint8_t subpage; uint8_t page_length[2]; }; struct scsi_mode_blk_desc { u_int8_t density; u_int8_t nblocks[3]; u_int8_t reserved; u_int8_t blklen[3]; }; #define SCSI_DEFAULT_DENSITY 0x00 /* use 'default' density */ #define SCSI_SAME_DENSITY 0x7f /* use 'same' density- >= SCSI-2 only */ /* * Status Byte */ #define SCSI_STATUS_OK 0x00 #define SCSI_STATUS_CHECK_COND 0x02 #define SCSI_STATUS_COND_MET 0x04 #define SCSI_STATUS_BUSY 0x08 #define SCSI_STATUS_INTERMED 0x10 #define SCSI_STATUS_INTERMED_COND_MET 0x14 #define SCSI_STATUS_RESERV_CONFLICT 0x18 #define SCSI_STATUS_CMD_TERMINATED 0x22 /* Obsolete in SAM-2 */ #define SCSI_STATUS_QUEUE_FULL 0x28 #define SCSI_STATUS_ACA_ACTIVE 0x30 #define SCSI_STATUS_TASK_ABORTED 0x40 struct scsi_inquiry_pattern { u_int8_t type; u_int8_t media_type; #define SIP_MEDIA_REMOVABLE 0x01 #define SIP_MEDIA_FIXED 0x02 const char *vendor; const char *product; const char *revision; }; struct scsi_static_inquiry_pattern { u_int8_t type; u_int8_t media_type; char vendor[SID_VENDOR_SIZE+1]; char product[SID_PRODUCT_SIZE+1]; char revision[SID_REVISION_SIZE+1]; }; struct scsi_sense_quirk_entry { struct scsi_inquiry_pattern inq_pat; int num_sense_keys; int num_ascs; struct sense_key_table_entry *sense_key_info; struct asc_table_entry *asc_info; }; struct sense_key_table_entry { u_int8_t sense_key; u_int32_t action; const char *desc; }; struct asc_table_entry { u_int8_t asc; u_int8_t ascq; u_int32_t action; const char *desc; }; struct op_table_entry { u_int8_t opcode; u_int32_t opmask; const char *desc; }; struct scsi_op_quirk_entry { struct scsi_inquiry_pattern inq_pat; int num_ops; struct op_table_entry *op_table; }; typedef enum { SSS_FLAG_NONE = 0x00, SSS_FLAG_PRINT_COMMAND = 0x01 } scsi_sense_string_flags; struct scsi_nv { const char *name; uint64_t value; }; typedef enum { SCSI_NV_FOUND, SCSI_NV_AMBIGUOUS, SCSI_NV_NOT_FOUND } scsi_nv_status; typedef enum { SCSI_NV_FLAG_NONE = 0x00, SCSI_NV_FLAG_IG_CASE = 0x01 /* Case insensitive comparison */ } scsi_nv_flags; struct ccb_scsiio; struct cam_periph; union ccb; #ifndef _KERNEL struct cam_device; #endif extern const char *scsi_sense_key_text[]; __BEGIN_DECLS void scsi_sense_desc(int sense_key, int asc, int ascq, struct scsi_inquiry_data *inq_data, const char **sense_key_desc, const char **asc_desc); scsi_sense_action scsi_error_action(struct ccb_scsiio* csio, struct scsi_inquiry_data *inq_data, u_int32_t sense_flags); const char * scsi_status_string(struct ccb_scsiio *csio); void scsi_desc_iterate(struct scsi_sense_data_desc *sense, u_int sense_len, int (*iter_func)(struct scsi_sense_data_desc *sense, u_int, struct scsi_sense_desc_header *, void *), void *arg); uint8_t *scsi_find_desc(struct scsi_sense_data_desc *sense, u_int sense_len, uint8_t desc_type); void scsi_set_sense_data(struct scsi_sense_data *sense_data, scsi_sense_data_type sense_format, int current_error, int sense_key, int asc, int ascq, ...) ; void scsi_set_sense_data_va(struct scsi_sense_data *sense_data, scsi_sense_data_type sense_format, int current_error, int sense_key, int asc, int ascq, va_list ap); int scsi_get_sense_info(struct scsi_sense_data *sense_data, u_int sense_len, uint8_t info_type, uint64_t *info, int64_t *signed_info); int scsi_get_sks(struct scsi_sense_data *sense_data, u_int sense_len, uint8_t *sks); int scsi_get_block_info(struct scsi_sense_data *sense_data, u_int sense_len, struct scsi_inquiry_data *inq_data, uint8_t *block_bits); int scsi_get_stream_info(struct scsi_sense_data *sense_data, u_int sense_len, struct scsi_inquiry_data *inq_data, uint8_t *stream_bits); void scsi_info_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, uint64_t info); void scsi_command_sbuf(struct sbuf *sb, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, uint64_t csi); void scsi_progress_sbuf(struct sbuf *sb, uint16_t progress); int scsi_sks_sbuf(struct sbuf *sb, int sense_key, uint8_t *sks); void scsi_fru_sbuf(struct sbuf *sb, uint64_t fru); void scsi_stream_sbuf(struct sbuf *sb, uint8_t stream_bits, uint64_t info); void scsi_block_sbuf(struct sbuf *sb, uint8_t block_bits, uint64_t info); void scsi_sense_info_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_command_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_sks_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_fru_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_stream_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_block_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_progress_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_generic_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); void scsi_sense_desc_sbuf(struct sbuf *sb, struct scsi_sense_data *sense, u_int sense_len, uint8_t *cdb, int cdb_len, struct scsi_inquiry_data *inq_data, struct scsi_sense_desc_header *header); scsi_sense_data_type scsi_sense_type(struct scsi_sense_data *sense_data); void scsi_sense_only_sbuf(struct scsi_sense_data *sense, u_int sense_len, struct sbuf *sb, char *path_str, struct scsi_inquiry_data *inq_data, uint8_t *cdb, int cdb_len); #ifdef _KERNEL int scsi_command_string(struct ccb_scsiio *csio, struct sbuf *sb); int scsi_sense_sbuf(struct ccb_scsiio *csio, struct sbuf *sb, scsi_sense_string_flags flags); char * scsi_sense_string(struct ccb_scsiio *csio, char *str, int str_len); void scsi_sense_print(struct ccb_scsiio *csio); int scsi_vpd_supported_page(struct cam_periph *periph, uint8_t page_id); #else /* _KERNEL */ int scsi_command_string(struct cam_device *device, struct ccb_scsiio *csio, struct sbuf *sb); int scsi_sense_sbuf(struct cam_device *device, struct ccb_scsiio *csio, struct sbuf *sb, scsi_sense_string_flags flags); char * scsi_sense_string(struct cam_device *device, struct ccb_scsiio *csio, char *str, int str_len); void scsi_sense_print(struct cam_device *device, struct ccb_scsiio *csio, FILE *ofile); #endif /* _KERNEL */ const char * scsi_op_desc(u_int16_t opcode, struct scsi_inquiry_data *inq_data); char * scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string, size_t len); void scsi_print_inquiry(struct scsi_inquiry_data *inq_data); void scsi_print_inquiry_short(struct scsi_inquiry_data *inq_data); u_int scsi_calc_syncsrate(u_int period_factor); u_int scsi_calc_syncparam(u_int period); typedef int (*scsi_devid_checkfn_t)(uint8_t *); int scsi_devid_is_naa_ieee_reg(uint8_t *bufp); int scsi_devid_is_sas_target(uint8_t *bufp); int scsi_devid_is_lun_eui64(uint8_t *bufp); int scsi_devid_is_lun_naa(uint8_t *bufp); int scsi_devid_is_lun_name(uint8_t *bufp); int scsi_devid_is_lun_t10(uint8_t *bufp); struct scsi_vpd_id_descriptor * scsi_get_devid(struct scsi_vpd_device_id *id, uint32_t len, scsi_devid_checkfn_t ck_fn); struct scsi_vpd_id_descriptor * scsi_get_devid_desc(struct scsi_vpd_id_descriptor *desc, uint32_t len, scsi_devid_checkfn_t ck_fn); int scsi_transportid_sbuf(struct sbuf *sb, struct scsi_transportid_header *hdr, uint32_t valid_len); const char * scsi_nv_to_str(struct scsi_nv *table, int num_table_entries, uint64_t value); scsi_nv_status scsi_get_nv(struct scsi_nv *table, int num_table_entries, char *name, int *table_entry, scsi_nv_flags flags); int scsi_parse_transportid_64bit(int proto_id, char *id_str, struct scsi_transportid_header **hdr, unsigned int *alloc_len, #ifdef _KERNEL struct malloc_type *type, int flags, #endif char *error_str, int error_str_len); int scsi_parse_transportid_spi(char *id_str, struct scsi_transportid_header **hdr, unsigned int *alloc_len, #ifdef _KERNEL struct malloc_type *type, int flags, #endif char *error_str, int error_str_len); int scsi_parse_transportid_rdma(char *id_str, struct scsi_transportid_header **hdr, unsigned int *alloc_len, #ifdef _KERNEL struct malloc_type *type, int flags, #endif char *error_str, int error_str_len); int scsi_parse_transportid_iscsi(char *id_str, struct scsi_transportid_header **hdr, unsigned int *alloc_len, #ifdef _KERNEL struct malloc_type *type, int flags, #endif char *error_str,int error_str_len); int scsi_parse_transportid_sop(char *id_str, struct scsi_transportid_header **hdr, unsigned int *alloc_len, #ifdef _KERNEL struct malloc_type *type, int flags, #endif char *error_str,int error_str_len); int scsi_parse_transportid(char *transportid_str, struct scsi_transportid_header **hdr, unsigned int *alloc_len, #ifdef _KERNEL struct malloc_type *type, int flags, #endif char *error_str, int error_str_len); int scsi_attrib_volcoh_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); int scsi_attrib_vendser_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); int scsi_attrib_hexdump_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); int scsi_attrib_int_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); int scsi_attrib_ascii_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); int scsi_attrib_text_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, uint32_t flags, uint32_t output_flags, char *error_str, int error_str_len); struct scsi_attrib_table_entry *scsi_find_attrib_entry( struct scsi_attrib_table_entry *table, size_t num_table_entries, uint32_t id); struct scsi_attrib_table_entry *scsi_get_attrib_entry(uint32_t id); int scsi_attrib_value_sbuf(struct sbuf *sb, uint32_t valid_len, struct scsi_mam_attribute_header *hdr, uint32_t output_flags, char *error_str, size_t error_str_len); void scsi_attrib_prefix_sbuf(struct sbuf *sb, uint32_t output_flags, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, const char *desc); int scsi_attrib_sbuf(struct sbuf *sb, struct scsi_mam_attribute_header *hdr, uint32_t valid_len, struct scsi_attrib_table_entry *user_table, size_t num_user_entries, int prefer_user_table, uint32_t output_flags, char *error_str, int error_str_len); void scsi_test_unit_ready(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t sense_len, u_int32_t timeout); void scsi_request_sense(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), void *data_ptr, u_int8_t dxfer_len, u_int8_t tag_action, u_int8_t sense_len, u_int32_t timeout); void scsi_inquiry(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t *inq_buf, u_int32_t inq_len, int evpd, u_int8_t page_code, u_int8_t sense_len, u_int32_t timeout); void scsi_mode_sense(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int dbd, u_int8_t page_code, u_int8_t page, u_int8_t *param_buf, u_int32_t param_len, u_int8_t sense_len, u_int32_t timeout); void scsi_mode_sense_len(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int dbd, u_int8_t page_code, u_int8_t page, u_int8_t *param_buf, u_int32_t param_len, int minimum_cmd_size, u_int8_t sense_len, u_int32_t timeout); void scsi_mode_select(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int scsi_page_fmt, int save_pages, u_int8_t *param_buf, u_int32_t param_len, u_int8_t sense_len, u_int32_t timeout); void scsi_mode_select_len(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int scsi_page_fmt, int save_pages, u_int8_t *param_buf, u_int32_t param_len, int minimum_cmd_size, u_int8_t sense_len, u_int32_t timeout); void scsi_log_sense(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t page_code, u_int8_t page, int save_pages, int ppc, u_int32_t paramptr, u_int8_t *param_buf, u_int32_t param_len, u_int8_t sense_len, u_int32_t timeout); void scsi_log_select(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t page_code, int save_pages, int pc_reset, u_int8_t *param_buf, u_int32_t param_len, u_int8_t sense_len, u_int32_t timeout); void scsi_prevent(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t action, u_int8_t sense_len, u_int32_t timeout); void scsi_read_capacity(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, struct scsi_read_capacity_data *, u_int8_t sense_len, u_int32_t timeout); void scsi_read_capacity_16(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, uint64_t lba, int reladr, int pmi, uint8_t *rcap_buf, int rcap_buf_len, uint8_t sense_len, uint32_t timeout); void scsi_report_luns(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t select_report, struct scsi_report_luns_data *rpl_buf, u_int32_t alloc_len, u_int8_t sense_len, u_int32_t timeout); void scsi_report_target_group(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t pdf, void *buf, u_int32_t alloc_len, u_int8_t sense_len, u_int32_t timeout); void scsi_set_target_group(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, void *buf, u_int32_t alloc_len, u_int8_t sense_len, u_int32_t timeout); void scsi_synchronize_cache(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int32_t begin_lba, u_int16_t lb_count, u_int8_t sense_len, u_int32_t timeout); void scsi_receive_diagnostic_results(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb*), uint8_t tag_action, int pcv, uint8_t page_code, uint8_t *data_ptr, uint16_t allocation_length, uint8_t sense_len, uint32_t timeout); void scsi_send_diagnostic(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, int unit_offline, int device_offline, int self_test, int page_format, int self_test_code, uint8_t *data_ptr, uint16_t param_list_length, uint8_t sense_len, uint32_t timeout); void scsi_read_buffer(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb*), uint8_t tag_action, int mode, uint8_t buffer_id, u_int32_t offset, uint8_t *data_ptr, uint32_t allocation_length, uint8_t sense_len, uint32_t timeout); void scsi_write_buffer(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, int mode, uint8_t buffer_id, u_int32_t offset, uint8_t *data_ptr, uint32_t param_list_length, uint8_t sense_len, uint32_t timeout); #define SCSI_RW_READ 0x0001 #define SCSI_RW_WRITE 0x0002 #define SCSI_RW_DIRMASK 0x0003 #define SCSI_RW_BIO 0x1000 void scsi_read_write(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int readop, u_int8_t byte2, int minimum_cmd_size, u_int64_t lba, u_int32_t block_count, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len, u_int32_t timeout); void scsi_write_same(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t byte2, int minimum_cmd_size, u_int64_t lba, u_int32_t block_count, u_int8_t *data_ptr, u_int32_t dxfer_len, u_int8_t sense_len, u_int32_t timeout); void scsi_ata_identify(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t *data_ptr, u_int16_t dxfer_len, u_int8_t sense_len, u_int32_t timeout); void scsi_ata_trim(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int16_t block_count, u_int8_t *data_ptr, u_int16_t dxfer_len, u_int8_t sense_len, u_int32_t timeout); void scsi_ata_pass_16(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int32_t flags, u_int8_t tag_action, u_int8_t protocol, u_int8_t ata_flags, u_int16_t features, u_int16_t sector_count, uint64_t lba, u_int8_t command, u_int8_t control, u_int8_t *data_ptr, u_int16_t dxfer_len, u_int8_t sense_len, u_int32_t timeout); void scsi_unmap(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t byte2, u_int8_t *data_ptr, u_int16_t dxfer_len, u_int8_t sense_len, u_int32_t timeout); void scsi_start_stop(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, int start, int load_eject, int immediate, u_int8_t sense_len, u_int32_t timeout); void scsi_read_attribute(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, u_int8_t service_action, uint32_t element, u_int8_t elem_type, int logical_volume, int partition, u_int32_t first_attribute, int cache, u_int8_t *data_ptr, u_int32_t length, int sense_len, u_int32_t timeout); void scsi_write_attribute(struct ccb_scsiio *csio, u_int32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), u_int8_t tag_action, uint32_t element, int logical_volume, int partition, int wtc, u_int8_t *data_ptr, u_int32_t length, int sense_len, u_int32_t timeout); void scsi_security_protocol_in(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, uint32_t security_protocol, uint32_t security_protocol_specific, int byte4, uint8_t *data_ptr, uint32_t dxfer_len, int sense_len, int timeout); void scsi_security_protocol_out(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *,union ccb *), uint8_t tag_action, uint32_t security_protocol, uint32_t security_protocol_specific, int byte4, uint8_t *data_ptr, uint32_t dxfer_len, int sense_len, int timeout); void scsi_persistent_reserve_in(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *,union ccb *), uint8_t tag_action, int service_action, uint8_t *data_ptr, uint32_t dxfer_len, int sense_len, int timeout); void scsi_persistent_reserve_out(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, int service_action, int scope, int res_type, uint8_t *data_ptr, uint32_t dxfer_len, int sense_len, int timeout); void scsi_report_supported_opcodes(struct ccb_scsiio *csio, uint32_t retries, void (*cbfcnp)(struct cam_periph *, union ccb *), uint8_t tag_action, int options, int req_opcode, int req_service_action, uint8_t *data_ptr, uint32_t dxfer_len, int sense_len, int timeout); int scsi_inquiry_match(caddr_t inqbuffer, caddr_t table_entry); int scsi_static_inquiry_match(caddr_t inqbuffer, caddr_t table_entry); int scsi_devid_match(uint8_t *rhs, size_t rhs_len, uint8_t *lhs, size_t lhs_len); void scsi_extract_sense(struct scsi_sense_data *sense, int *error_code, int *sense_key, int *asc, int *ascq); int scsi_extract_sense_ccb(union ccb *ccb, int *error_code, int *sense_key, int *asc, int *ascq); void scsi_extract_sense_len(struct scsi_sense_data *sense, u_int sense_len, int *error_code, int *sense_key, int *asc, int *ascq, int show_errors); int scsi_get_sense_key(struct scsi_sense_data *sense, u_int sense_len, int show_errors); int scsi_get_asc(struct scsi_sense_data *sense, u_int sense_len, int show_errors); int scsi_get_ascq(struct scsi_sense_data *sense, u_int sense_len, int show_errors); static __inline void scsi_ulto2b(u_int32_t val, u_int8_t *bytes); static __inline void scsi_ulto3b(u_int32_t val, u_int8_t *bytes); static __inline void scsi_ulto4b(u_int32_t val, u_int8_t *bytes); static __inline void scsi_u64to8b(u_int64_t val, u_int8_t *bytes); static __inline uint32_t scsi_2btoul(const uint8_t *bytes); static __inline uint32_t scsi_3btoul(const uint8_t *bytes); static __inline int32_t scsi_3btol(const uint8_t *bytes); static __inline uint32_t scsi_4btoul(const uint8_t *bytes); static __inline uint64_t scsi_8btou64(const uint8_t *bytes); static __inline void *find_mode_page_6(struct scsi_mode_header_6 *mode_header); static __inline void *find_mode_page_10(struct scsi_mode_header_10 *mode_header); static __inline void scsi_ulto2b(u_int32_t val, u_int8_t *bytes) { bytes[0] = (val >> 8) & 0xff; bytes[1] = val & 0xff; } static __inline void scsi_ulto3b(u_int32_t val, u_int8_t *bytes) { bytes[0] = (val >> 16) & 0xff; bytes[1] = (val >> 8) & 0xff; bytes[2] = val & 0xff; } static __inline void scsi_ulto4b(u_int32_t val, u_int8_t *bytes) { bytes[0] = (val >> 24) & 0xff; bytes[1] = (val >> 16) & 0xff; bytes[2] = (val >> 8) & 0xff; bytes[3] = val & 0xff; } static __inline void scsi_u64to8b(u_int64_t val, u_int8_t *bytes) { bytes[0] = (val >> 56) & 0xff; bytes[1] = (val >> 48) & 0xff; bytes[2] = (val >> 40) & 0xff; bytes[3] = (val >> 32) & 0xff; bytes[4] = (val >> 24) & 0xff; bytes[5] = (val >> 16) & 0xff; bytes[6] = (val >> 8) & 0xff; bytes[7] = val & 0xff; } static __inline uint32_t scsi_2btoul(const uint8_t *bytes) { uint32_t rv; rv = (bytes[0] << 8) | bytes[1]; return (rv); } static __inline uint32_t scsi_3btoul(const uint8_t *bytes) { uint32_t rv; rv = (bytes[0] << 16) | (bytes[1] << 8) | bytes[2]; return (rv); } static __inline int32_t scsi_3btol(const uint8_t *bytes) { uint32_t rc = scsi_3btoul(bytes); if (rc & 0x00800000) rc |= 0xff000000; return (int32_t) rc; } static __inline uint32_t scsi_4btoul(const uint8_t *bytes) { uint32_t rv; rv = (bytes[0] << 24) | (bytes[1] << 16) | (bytes[2] << 8) | bytes[3]; return (rv); } static __inline uint64_t scsi_8btou64(const uint8_t *bytes) { uint64_t rv; rv = (((uint64_t)bytes[0]) << 56) | (((uint64_t)bytes[1]) << 48) | (((uint64_t)bytes[2]) << 40) | (((uint64_t)bytes[3]) << 32) | (((uint64_t)bytes[4]) << 24) | (((uint64_t)bytes[5]) << 16) | (((uint64_t)bytes[6]) << 8) | bytes[7]; return (rv); } /* * Given the pointer to a returned mode sense buffer, return a pointer to * the start of the first mode page. */ static __inline void * find_mode_page_6(struct scsi_mode_header_6 *mode_header) { void *page_start; page_start = (void *)((u_int8_t *)&mode_header[1] + mode_header->blk_desc_len); return(page_start); } static __inline void * find_mode_page_10(struct scsi_mode_header_10 *mode_header) { void *page_start; page_start = (void *)((u_int8_t *)&mode_header[1] + scsi_2btoul(mode_header->blk_desc_len)); return(page_start); } __END_DECLS #endif /*_SCSI_SCSI_ALL_H*/ Index: projects/clang370-import/sys/dev/ahci/ahci_pci.c =================================================================== --- projects/clang370-import/sys/dev/ahci/ahci_pci.c (revision 288125) +++ projects/clang370-import/sys/dev/ahci/ahci_pci.c (revision 288126) @@ -1,624 +1,627 @@ /*- * Copyright (c) 2009-2012 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ahci.h" static int force_ahci = 1; TUNABLE_INT("hw.ahci.force", &force_ahci); static const struct { uint32_t id; uint8_t rev; const char *name; int quirks; } ahci_ids[] = { {0x43801002, 0x00, "AMD SB600", AHCI_Q_NOMSI | AHCI_Q_ATI_PMP_BUG | AHCI_Q_MAXIO_64K}, {0x43901002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43911002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43921002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43931002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, {0x43941002, 0x00, "AMD SB7x0/SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG | AHCI_Q_1MSI}, /* Not sure SB8x0/SB9x0 needs this quirk. Be conservative though */ {0x43951002, 0x00, "AMD SB8x0/SB9x0", AHCI_Q_ATI_PMP_BUG}, {0x78001022, 0x00, "AMD Hudson-2", 0}, {0x78011022, 0x00, "AMD Hudson-2", 0}, {0x78021022, 0x00, "AMD Hudson-2", 0}, {0x78031022, 0x00, "AMD Hudson-2", 0}, {0x78041022, 0x00, "AMD Hudson-2", 0}, {0x06111b21, 0x00, "ASMedia ASM2106", 0}, {0x06121b21, 0x00, "ASMedia ASM1061", 0}, {0x26528086, 0x00, "Intel ICH6", AHCI_Q_NOFORCE}, {0x26538086, 0x00, "Intel ICH6M", AHCI_Q_NOFORCE}, {0x26818086, 0x00, "Intel ESB2", 0}, {0x26828086, 0x00, "Intel ESB2", 0}, {0x26838086, 0x00, "Intel ESB2", 0}, {0x27c18086, 0x00, "Intel ICH7", 0}, {0x27c38086, 0x00, "Intel ICH7", 0}, {0x27c58086, 0x00, "Intel ICH7M", 0}, {0x27c68086, 0x00, "Intel ICH7M", 0}, {0x28218086, 0x00, "Intel ICH8", 0}, {0x28228086, 0x00, "Intel ICH8", 0}, {0x28248086, 0x00, "Intel ICH8", 0}, {0x28298086, 0x00, "Intel ICH8M", 0}, {0x282a8086, 0x00, "Intel ICH8M", 0}, {0x29228086, 0x00, "Intel ICH9", 0}, {0x29238086, 0x00, "Intel ICH9", 0}, {0x29248086, 0x00, "Intel ICH9", 0}, {0x29258086, 0x00, "Intel ICH9", 0}, {0x29278086, 0x00, "Intel ICH9", 0}, {0x29298086, 0x00, "Intel ICH9M", 0}, {0x292a8086, 0x00, "Intel ICH9M", 0}, {0x292b8086, 0x00, "Intel ICH9M", 0}, {0x292c8086, 0x00, "Intel ICH9M", 0}, {0x292f8086, 0x00, "Intel ICH9M", 0}, {0x294d8086, 0x00, "Intel ICH9", 0}, {0x294e8086, 0x00, "Intel ICH9M", 0}, {0x3a058086, 0x00, "Intel ICH10", 0}, {0x3a228086, 0x00, "Intel ICH10", 0}, {0x3a258086, 0x00, "Intel ICH10", 0}, {0x3b228086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b238086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b258086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b298086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b2c8086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x3b2f8086, 0x00, "Intel 5 Series/3400 Series", 0}, {0x1c028086, 0x00, "Intel Cougar Point", 0}, {0x1c038086, 0x00, "Intel Cougar Point", 0}, {0x1c048086, 0x00, "Intel Cougar Point", 0}, {0x1c058086, 0x00, "Intel Cougar Point", 0}, {0x1d028086, 0x00, "Intel Patsburg", 0}, {0x1d048086, 0x00, "Intel Patsburg", 0}, {0x1d068086, 0x00, "Intel Patsburg", 0}, {0x28268086, 0x00, "Intel Patsburg (RAID)", 0}, {0x1e028086, 0x00, "Intel Panther Point", 0}, {0x1e038086, 0x00, "Intel Panther Point", 0}, {0x1e048086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e058086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e068086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e078086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e0e8086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1e0f8086, 0x00, "Intel Panther Point (RAID)", 0}, {0x1f228086, 0x00, "Intel Avoton", 0}, {0x1f238086, 0x00, "Intel Avoton", 0}, {0x1f248086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f258086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f268086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f278086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f2e8086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f2f8086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f328086, 0x00, "Intel Avoton", 0}, {0x1f338086, 0x00, "Intel Avoton", 0}, {0x1f348086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f358086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f368086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f378086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f3e8086, 0x00, "Intel Avoton (RAID)", 0}, {0x1f3f8086, 0x00, "Intel Avoton (RAID)", 0}, {0x23a38086, 0x00, "Intel Coleto Creek", 0}, {0x28238086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x28278086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8c028086, 0x00, "Intel Lynx Point", 0}, {0x8c038086, 0x00, "Intel Lynx Point", 0}, {0x8c048086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c058086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c068086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c078086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c0e8086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c0f8086, 0x00, "Intel Lynx Point (RAID)", 0}, {0x8c828086, 0x00, "Intel Wildcat Point", 0}, {0x8c838086, 0x00, "Intel Wildcat Point", 0}, {0x8c848086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c858086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c868086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c878086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c8e8086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8c8f8086, 0x00, "Intel Wildcat Point (RAID)", 0}, {0x8d028086, 0x00, "Intel Wellsburg", 0}, {0x8d048086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d068086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d628086, 0x00, "Intel Wellsburg", 0}, {0x8d648086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d668086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x8d6e8086, 0x00, "Intel Wellsburg (RAID)", 0}, {0x9c028086, 0x00, "Intel Lynx Point-LP", 0}, {0x9c038086, 0x00, "Intel Lynx Point-LP", 0}, {0x9c048086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c058086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c068086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c078086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c0e8086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x9c0f8086, 0x00, "Intel Lynx Point-LP (RAID)", 0}, {0x23238086, 0x00, "Intel DH89xxCC", 0}, {0x2360197b, 0x00, "JMicron JMB360", 0}, {0x2361197b, 0x00, "JMicron JMB361", AHCI_Q_NOFORCE}, {0x2362197b, 0x00, "JMicron JMB362", 0}, {0x2363197b, 0x00, "JMicron JMB363", AHCI_Q_NOFORCE}, {0x2365197b, 0x00, "JMicron JMB365", AHCI_Q_NOFORCE}, {0x2366197b, 0x00, "JMicron JMB366", AHCI_Q_NOFORCE}, {0x2368197b, 0x00, "JMicron JMB368", AHCI_Q_NOFORCE}, {0x611111ab, 0x00, "Marvell 88SE6111", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_1CH | AHCI_Q_EDGEIS}, {0x612111ab, 0x00, "Marvell 88SE6121", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_2CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x614111ab, 0x00, "Marvell 88SE6141", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x614511ab, 0x00, "Marvell 88SE6145", AHCI_Q_NOFORCE | AHCI_Q_NOPMP | AHCI_Q_4CH | AHCI_Q_EDGEIS | AHCI_Q_NONCQ | AHCI_Q_NOCOUNT}, {0x91201b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS}, {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_ALTSIG}, {0x91231b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_SATA2}, {0x91251b4b, 0x00, "Marvell 88SE9125", 0}, {0x91281b4b, 0x00, "Marvell 88SE9128", AHCI_Q_ALTSIG}, {0x91301b4b, 0x00, "Marvell 88SE9130", AHCI_Q_ALTSIG}, {0x91721b4b, 0x00, "Marvell 88SE9172", 0}, {0x91821b4b, 0x00, "Marvell 88SE9182", 0}, {0x91831b4b, 0x00, "Marvell 88SS9183", 0}, {0x91a01b4b, 0x00, "Marvell 88SE91Ax", 0}, {0x92151b4b, 0x00, "Marvell 88SE9215", 0}, {0x92201b4b, 0x00, "Marvell 88SE9220", AHCI_Q_ALTSIG}, {0x92301b4b, 0x00, "Marvell 88SE9230", AHCI_Q_ALTSIG}, {0x92351b4b, 0x00, "Marvell 88SE9235", 0}, {0x06201103, 0x00, "HighPoint RocketRAID 620", 0}, {0x06201b4b, 0x00, "HighPoint RocketRAID 620", 0}, {0x06221103, 0x00, "HighPoint RocketRAID 622", 0}, {0x06221b4b, 0x00, "HighPoint RocketRAID 622", 0}, {0x06401103, 0x00, "HighPoint RocketRAID 640", 0}, {0x06401b4b, 0x00, "HighPoint RocketRAID 640", 0}, {0x06441103, 0x00, "HighPoint RocketRAID 644", 0}, {0x06441b4b, 0x00, "HighPoint RocketRAID 644", 0}, {0x06411103, 0x00, "HighPoint RocketRAID 640L", 0}, {0x06421103, 0x00, "HighPoint RocketRAID 642L", 0}, {0x06451103, 0x00, "HighPoint RocketRAID 644L", 0}, {0x044c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x045f10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x055010de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055110de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055210de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055310de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055510de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055610de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055710de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055810de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055910de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055A10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x055B10de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x058410de, 0x00, "NVIDIA MCP67", AHCI_Q_NOAA}, {0x07f010de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f110de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f210de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f310de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f410de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f510de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f610de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f710de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f810de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07f910de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07fa10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x07fb10de, 0x00, "NVIDIA MCP73", AHCI_Q_NOAA}, {0x0ad010de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad110de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad210de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad310de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad410de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad510de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad610de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad710de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad810de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ad910de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ada10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0adb10de, 0x00, "NVIDIA MCP77", AHCI_Q_NOAA}, {0x0ab410de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab510de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab610de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab710de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab810de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0ab910de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0aba10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abb10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abc10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abd10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abe10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0abf10de, 0x00, "NVIDIA MCP79", AHCI_Q_NOAA}, {0x0d8410de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8510de, 0x00, "NVIDIA MCP89", AHCI_Q_NOFORCE|AHCI_Q_NOAA}, {0x0d8610de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8710de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8810de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8910de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8a10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8b10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8c10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8d10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8e10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x0d8f10de, 0x00, "NVIDIA MCP89", AHCI_Q_NOAA}, {0x3781105a, 0x00, "Promise TX8660", 0}, {0x33491106, 0x00, "VIA VT8251", AHCI_Q_NOPMP|AHCI_Q_NONCQ}, {0x62871106, 0x00, "VIA VT8251", AHCI_Q_NOPMP|AHCI_Q_NONCQ}, {0x11841039, 0x00, "SiS 966", 0}, {0x11851039, 0x00, "SiS 968", 0}, {0x01861039, 0x00, "SiS 968", 0}, {0xa01c177d, 0x00, "ThunderX", AHCI_Q_ABAR0|AHCI_Q_1MSI}, {0x00311c36, 0x00, "Annapurna", AHCI_Q_FORCE_PI|AHCI_Q_RESTORE_CAP}, {0x00000000, 0x00, NULL, 0} }; static int ahci_pci_ctlr_reset(device_t dev) { if (pci_read_config(dev, PCIR_DEVVENDOR, 4) == 0x28298086 && (pci_read_config(dev, 0x92, 1) & 0xfe) == 0x04) pci_write_config(dev, 0x92, 0x01, 1); return ahci_ctlr_reset(dev); } static int ahci_probe(device_t dev) { char buf[64]; int i, valid = 0; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); /* * Ensure it is not a PCI bridge (some vendors use * the same PID and VID in PCI bridge and AHCI cards). */ if (pci_get_class(dev) == PCIC_BRIDGE) return (ENXIO); /* Is this a possible AHCI candidate? */ if (pci_get_class(dev) == PCIC_STORAGE && pci_get_subclass(dev) == PCIS_STORAGE_SATA && pci_get_progif(dev) == PCIP_STORAGE_SATA_AHCI_1_0) valid = 1; + else if (pci_get_class(dev) == PCIC_STORAGE && + pci_get_subclass(dev) == PCIS_STORAGE_RAID) + valid = 2; /* Is this a known AHCI chip? */ for (i = 0; ahci_ids[i].id != 0; i++) { if (ahci_ids[i].id == devid && ahci_ids[i].rev <= revid && (valid || (force_ahci == 1 && !(ahci_ids[i].quirks & AHCI_Q_NOFORCE)))) { /* Do not attach JMicrons with single PCI function. */ if (pci_get_vendor(dev) == 0x197b && (pci_read_config(dev, 0xdf, 1) & 0x40) == 0) return (ENXIO); snprintf(buf, sizeof(buf), "%s AHCI SATA controller", ahci_ids[i].name); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } } - if (!valid) + if (valid != 1) return (ENXIO); device_set_desc_copy(dev, "AHCI SATA controller"); return (BUS_PROBE_DEFAULT); } static int ahci_ata_probe(device_t dev) { char buf[64]; int i; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); if ((intptr_t)device_get_ivars(dev) >= 0) return (ENXIO); /* Is this a known AHCI chip? */ for (i = 0; ahci_ids[i].id != 0; i++) { if (ahci_ids[i].id == devid && ahci_ids[i].rev <= revid) { snprintf(buf, sizeof(buf), "%s AHCI SATA controller", ahci_ids[i].name); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } } device_set_desc_copy(dev, "AHCI SATA controller"); return (BUS_PROBE_DEFAULT); } static int ahci_pci_read_msix_bars(device_t dev, uint8_t *table_bar, uint8_t *pba_bar) { int cap_offset = 0, ret; uint32_t val; if ((table_bar == NULL) || (pba_bar == NULL)) return (EINVAL); ret = pci_find_cap(dev, PCIY_MSIX, &cap_offset); if (ret != 0) return (EINVAL); val = pci_read_config(dev, cap_offset + PCIR_MSIX_TABLE, 4); *table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); val = pci_read_config(dev, cap_offset + PCIR_MSIX_PBA, 4); *pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK); return (0); } static int ahci_pci_attach(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); int error, i; uint32_t devid = pci_get_devid(dev); uint8_t revid = pci_get_revid(dev); int msi_count, msix_count; uint8_t table_bar = 0, pba_bar = 0; msi_count = pci_msi_count(dev); msix_count = pci_msix_count(dev); i = 0; while (ahci_ids[i].id != 0 && (ahci_ids[i].id != devid || ahci_ids[i].rev > revid)) i++; ctlr->quirks = ahci_ids[i].quirks; /* Limit speed for my onboard JMicron external port. * It is not eSATA really, limit to SATA 1 */ if (pci_get_devid(dev) == 0x2363197b && pci_get_subvendor(dev) == 0x1043 && pci_get_subdevice(dev) == 0x81e4) ctlr->quirks |= AHCI_Q_SATA1_UNIT0; ctlr->vendorid = pci_get_vendor(dev); ctlr->deviceid = pci_get_device(dev); ctlr->subvendorid = pci_get_subvendor(dev); ctlr->subdeviceid = pci_get_subdevice(dev); /* Default AHCI Base Address is BAR(5), Cavium uses BAR(0) */ if (ctlr->quirks & AHCI_Q_ABAR0) ctlr->r_rid = PCIR_BAR(0); else ctlr->r_rid = PCIR_BAR(5); if (!(ctlr->r_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_rid, RF_ACTIVE))) return ENXIO; /* Read MSI-x BAR IDs if supported */ if (msix_count > 0) { error = ahci_pci_read_msix_bars(dev, &table_bar, &pba_bar); if (error == 0) { ctlr->r_msix_tab_rid = table_bar; ctlr->r_msix_pba_rid = pba_bar; } else { /* Failed to read BARs, disable MSI-x */ msix_count = 0; } } /* Allocate resources for MSI-x table and PBA */ if (msix_count > 0) { /* * Allocate new MSI-x table only if not * allocated before. */ ctlr->r_msix_table = NULL; if (ctlr->r_msix_tab_rid != ctlr->r_rid) { /* Separate BAR for MSI-x */ ctlr->r_msix_table = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_msix_tab_rid, RF_ACTIVE); if (ctlr->r_msix_table == NULL) { ahci_free_mem(dev); return (ENXIO); } } /* * Allocate new PBA table only if not * allocated before. */ ctlr->r_msix_pba = NULL; if ((ctlr->r_msix_pba_rid != ctlr->r_msix_tab_rid) && (ctlr->r_msix_pba_rid != ctlr->r_rid)) { /* Separate BAR for PBA */ ctlr->r_msix_pba = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ctlr->r_msix_pba_rid, RF_ACTIVE); if (ctlr->r_msix_pba == NULL) { ahci_free_mem(dev); return (ENXIO); } } } pci_enable_busmaster(dev); /* Reset controller */ if ((error = ahci_pci_ctlr_reset(dev)) != 0) { ahci_free_mem(dev); return (error); }; /* Setup interrupts. */ /* Setup MSI register parameters */ /* Process hints. */ if (ctlr->quirks & AHCI_Q_NOMSI) ctlr->msi = 0; else if (ctlr->quirks & AHCI_Q_1MSI) ctlr->msi = 1; else ctlr->msi = 2; resource_int_value(device_get_name(dev), device_get_unit(dev), "msi", &ctlr->msi); ctlr->numirqs = 1; if (msi_count == 0 && msix_count == 0) ctlr->msi = 0; if (ctlr->msi < 0) ctlr->msi = 0; else if (ctlr->msi == 1) { msi_count = min(1, msi_count); msix_count = min(1, msix_count); } else if (ctlr->msi > 1) ctlr->msi = 2; /* Allocate MSI/MSI-x if needed/present. */ if (ctlr->msi > 0) { error = ENXIO; /* Try to allocate MSI-x first */ if (msix_count > 0) { error = pci_alloc_msix(dev, &msix_count); if (error == 0) ctlr->numirqs = msix_count; } /* * Try to allocate MSI if msi_count is greater than 0 * and if MSI-x allocation failed. */ if ((error != 0) && (msi_count > 0)) { error = pci_alloc_msi(dev, &msi_count); if (error == 0) ctlr->numirqs = msi_count; } /* Both MSI and MSI-x allocations failed */ if (error != 0) { ctlr->msi = 0; device_printf(dev, "Failed to allocate MSI/MSI-x, " "falling back to INTx\n"); } } error = ahci_attach(dev); if (error != 0) { if (ctlr->msi > 0) pci_release_msi(dev); ahci_free_mem(dev); } return error; } static int ahci_pci_detach(device_t dev) { ahci_detach(dev); pci_release_msi(dev); return (0); } static int ahci_pci_suspend(device_t dev) { struct ahci_controller *ctlr = device_get_softc(dev); bus_generic_suspend(dev); /* Disable interupts, so the state change(s) doesn't trigger */ ATA_OUTL(ctlr->r_mem, AHCI_GHC, ATA_INL(ctlr->r_mem, AHCI_GHC) & (~AHCI_GHC_IE)); return 0; } static int ahci_pci_resume(device_t dev) { int res; if ((res = ahci_pci_ctlr_reset(dev)) != 0) return (res); ahci_ctlr_setup(dev); return (bus_generic_resume(dev)); } devclass_t ahci_devclass; static device_method_t ahci_methods[] = { DEVMETHOD(device_probe, ahci_probe), DEVMETHOD(device_attach, ahci_pci_attach), DEVMETHOD(device_detach, ahci_pci_detach), DEVMETHOD(device_suspend, ahci_pci_suspend), DEVMETHOD(device_resume, ahci_pci_resume), DEVMETHOD(bus_print_child, ahci_print_child), DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), DEVMETHOD(bus_release_resource, ahci_release_resource), DEVMETHOD(bus_setup_intr, ahci_setup_intr), DEVMETHOD(bus_teardown_intr,ahci_teardown_intr), DEVMETHOD(bus_child_location_str, ahci_child_location_str), DEVMETHOD(bus_get_dma_tag, ahci_get_dma_tag), DEVMETHOD_END }; static driver_t ahci_driver = { "ahci", ahci_methods, sizeof(struct ahci_controller) }; DRIVER_MODULE(ahci, pci, ahci_driver, ahci_devclass, NULL, NULL); static device_method_t ahci_ata_methods[] = { DEVMETHOD(device_probe, ahci_ata_probe), DEVMETHOD(device_attach, ahci_pci_attach), DEVMETHOD(device_detach, ahci_pci_detach), DEVMETHOD(device_suspend, ahci_pci_suspend), DEVMETHOD(device_resume, ahci_pci_resume), DEVMETHOD(bus_print_child, ahci_print_child), DEVMETHOD(bus_alloc_resource, ahci_alloc_resource), DEVMETHOD(bus_release_resource, ahci_release_resource), DEVMETHOD(bus_setup_intr, ahci_setup_intr), DEVMETHOD(bus_teardown_intr,ahci_teardown_intr), DEVMETHOD(bus_child_location_str, ahci_child_location_str), DEVMETHOD_END }; static driver_t ahci_ata_driver = { "ahci", ahci_ata_methods, sizeof(struct ahci_controller) }; DRIVER_MODULE(ahci, atapci, ahci_ata_driver, ahci_devclass, NULL, NULL); Index: projects/clang370-import/sys/dev/drm2/drm_lock.c =================================================================== --- projects/clang370-import/sys/dev/drm2/drm_lock.c (revision 288125) +++ projects/clang370-import/sys/dev/drm2/drm_lock.c (revision 288126) @@ -1,376 +1,380 @@ /** * \file drm_lock.c * IOCTLs for locking * * \author Rickard E. (Rik) Faith * \author Gareth Hughes */ /* * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include +#if defined(__linux__) static int drm_notifier(void *priv); +#endif static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context); /** * Lock ioctl. * * \param inode device inode. * \param file_priv DRM file private. * \param cmd command. * \param arg user argument, pointing to a drm_lock structure. * \return zero on success or negative number on failure. * * Add the current task to the lock wait queue, and attempt to take to lock. */ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_lock *lock = data; struct drm_master *master = file_priv->master; int ret = 0; ++file_priv->lock_count; if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", DRM_CURRENTPID, lock->context); return -EINVAL; } DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n", lock->context, DRM_CURRENTPID, master->lock.hw_lock->lock, lock->flags); mtx_lock(&master->lock.spinlock); master->lock.user_waiters++; mtx_unlock(&master->lock.spinlock); for (;;) { #if defined(__linux__) if (!master->lock.hw_lock) { /* Device has been unregistered */ send_sig(SIGTERM, current, 0); ret = -EINTR; break; } #endif if (drm_lock_take(&master->lock, lock->context)) { master->lock.file_priv = file_priv; master->lock.lock_time = jiffies; atomic_inc(&dev->counts[_DRM_STAT_LOCKS]); break; /* Got lock */ } /* Contention */ DRM_UNLOCK_ASSERT(dev); ret = -sx_sleep(&master->lock.lock_queue, &drm_global_mutex, PCATCH, "drmlk2", 0); if (ret == -ERESTART) ret = -ERESTARTSYS; if (ret != 0) break; } mtx_lock(&master->lock.spinlock); master->lock.user_waiters--; mtx_unlock(&master->lock.spinlock); DRM_DEBUG("%d %s\n", lock->context, ret ? "interrupted" : "has lock"); if (ret) return ret; #if defined(__linux__) /* don't set the block all signals on the master process for now * really probably not the correct answer but lets us debug xkb * xserver for now */ if (!file_priv->is_master) { sigemptyset(&dev->sigmask); sigaddset(&dev->sigmask, SIGSTOP); sigaddset(&dev->sigmask, SIGTSTP); sigaddset(&dev->sigmask, SIGTTIN); sigaddset(&dev->sigmask, SIGTTOU); dev->sigdata.context = lock->context; dev->sigdata.lock = master->lock.hw_lock; block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask); } #endif if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT)) { if (dev->driver->dma_quiescent(dev)) { DRM_DEBUG("%d waiting for DMA quiescent\n", lock->context); return -EBUSY; } } return 0; } /** * Unlock ioctl. * * \param inode device inode. * \param file_priv DRM file private. * \param cmd command. * \param arg user argument, pointing to a drm_lock structure. * \return zero on success or negative number on failure. * * Transfer and free the lock. */ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_lock *lock = data; struct drm_master *master = file_priv->master; if (lock->context == DRM_KERNEL_CONTEXT) { DRM_ERROR("Process %d using kernel context %d\n", DRM_CURRENTPID, lock->context); return -EINVAL; } atomic_inc(&dev->counts[_DRM_STAT_UNLOCKS]); if (drm_lock_free(&master->lock, lock->context)) { /* FIXME: Should really bail out here. */ } #if defined(__linux__) unblock_all_signals(); #endif return 0; } /** * Take the heavyweight lock. * * \param lock lock pointer. * \param context locking context. * \return one if the lock is held, or zero otherwise. * * Attempt to mark the lock as held by the given context, via the \p cmpxchg instruction. */ static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context) { unsigned int old, new, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; mtx_lock(&lock_data->spinlock); do { old = *lock; if (old & _DRM_LOCK_HELD) new = old | _DRM_LOCK_CONT; else { new = context | _DRM_LOCK_HELD | ((lock_data->user_waiters + lock_data->kernel_waiters > 1) ? _DRM_LOCK_CONT : 0); } prev = cmpxchg(lock, old, new); } while (prev != old); mtx_unlock(&lock_data->spinlock); if (_DRM_LOCKING_CONTEXT(old) == context) { if (old & _DRM_LOCK_HELD) { if (context != DRM_KERNEL_CONTEXT) { DRM_ERROR("%d holds heavyweight lock\n", context); } return 0; } } if ((_DRM_LOCKING_CONTEXT(new)) == context && (new & _DRM_LOCK_HELD)) { /* Have lock */ return 1; } return 0; } /** * This takes a lock forcibly and hands it to context. Should ONLY be used * inside *_unlock to give lock to kernel before calling *_dma_schedule. * * \param dev DRM device. * \param lock lock pointer. * \param context locking context. * \return always one. * * Resets the lock file pointer. * Marks the lock as held by the given context, via the \p cmpxchg instruction. */ static int drm_lock_transfer(struct drm_lock_data *lock_data, unsigned int context) { unsigned int old, new, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; lock_data->file_priv = NULL; do { old = *lock; new = context | _DRM_LOCK_HELD; prev = cmpxchg(lock, old, new); } while (prev != old); return 1; } /** * Free lock. * * \param dev DRM device. * \param lock lock. * \param context context. * * Resets the lock file pointer. * Marks the lock as not held, via the \p cmpxchg instruction. Wakes any task * waiting on the lock queue. */ int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context) { unsigned int old, new, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; mtx_lock(&lock_data->spinlock); if (lock_data->kernel_waiters != 0) { drm_lock_transfer(lock_data, 0); lock_data->idle_has_lock = 1; mtx_unlock(&lock_data->spinlock); return 1; } mtx_unlock(&lock_data->spinlock); do { old = *lock; new = _DRM_LOCKING_CONTEXT(old); prev = cmpxchg(lock, old, new); } while (prev != old); if (_DRM_LOCK_IS_HELD(old) && _DRM_LOCKING_CONTEXT(old) != context) { DRM_ERROR("%d freed heavyweight lock held by %d\n", context, _DRM_LOCKING_CONTEXT(old)); return 1; } wake_up_interruptible(&lock_data->lock_queue); return 0; } +#if defined(__linux__) /** * If we get here, it means that the process has called DRM_IOCTL_LOCK * without calling DRM_IOCTL_UNLOCK. * * If the lock is not held, then let the signal proceed as usual. If the lock * is held, then set the contended flag and keep the signal blocked. * * \param priv pointer to a drm_sigdata structure. * \return one if the signal should be delivered normally, or zero if the * signal should be blocked. */ static int drm_notifier(void *priv) { struct drm_sigdata *s = (struct drm_sigdata *) priv; unsigned int old, new, prev; /* Allow signal delivery if lock isn't held */ if (!s->lock || !_DRM_LOCK_IS_HELD(s->lock->lock) || _DRM_LOCKING_CONTEXT(s->lock->lock) != s->context) return 1; /* Otherwise, set flag to force call to drmUnlock */ do { old = s->lock->lock; new = old | _DRM_LOCK_CONT; prev = cmpxchg(&s->lock->lock, old, new); } while (prev != old); return 0; } +#endif /** * This function returns immediately and takes the hw lock * with the kernel context if it is free, otherwise it gets the highest priority when and if * it is eventually released. * * This guarantees that the kernel will _eventually_ have the lock _unless_ it is held * by a blocked process. (In the latter case an explicit wait for the hardware lock would cause * a deadlock, which is why the "idlelock" was invented). * * This should be sufficient to wait for GPU idle without * having to worry about starvation. */ void drm_idlelock_take(struct drm_lock_data *lock_data) { int ret; mtx_lock(&lock_data->spinlock); lock_data->kernel_waiters++; if (!lock_data->idle_has_lock) { mtx_unlock(&lock_data->spinlock); ret = drm_lock_take(lock_data, DRM_KERNEL_CONTEXT); mtx_lock(&lock_data->spinlock); if (ret == 1) lock_data->idle_has_lock = 1; } mtx_unlock(&lock_data->spinlock); } EXPORT_SYMBOL(drm_idlelock_take); void drm_idlelock_release(struct drm_lock_data *lock_data) { unsigned int old, prev; volatile unsigned int *lock = &lock_data->hw_lock->lock; mtx_lock(&lock_data->spinlock); if (--lock_data->kernel_waiters == 0) { if (lock_data->idle_has_lock) { do { old = *lock; prev = cmpxchg(lock, old, DRM_KERNEL_CONTEXT); } while (prev != old); wake_up_interruptible(&lock_data->lock_queue); lock_data->idle_has_lock = 0; } } mtx_unlock(&lock_data->spinlock); } EXPORT_SYMBOL(drm_idlelock_release); int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv) { struct drm_master *master = file_priv->master; return (file_priv->lock_count && master->lock.hw_lock && _DRM_LOCK_IS_HELD(master->lock.hw_lock->lock) && master->lock.file_priv == file_priv); } Index: projects/clang370-import/sys/kern/vfs_bio.c =================================================================== --- projects/clang370-import/sys/kern/vfs_bio.c (revision 288125) +++ projects/clang370-import/sys/kern/vfs_bio.c (revision 288126) @@ -1,4645 +1,4651 @@ /*- * Copyright (c) 2004 Poul-Henning Kamp * Copyright (c) 1994,1997 John S. Dyson * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * this file contains a new buffer I/O scheme implementing a coherent * VM object and buffer cache scheme. Pains have been taken to make * sure that the performance degradation associated with schemes such * as this is not realized. * * Author: John S. Dyson * Significant help during the development and debugging phases * had been provided by David Greenman, also of the FreeBSD core team. * * see man buf(9) for more info. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_compat.h" #include "opt_swap.h" static MALLOC_DEFINE(M_BIOBUF, "biobuf", "BIO buffer"); struct bio_ops bioops; /* I/O operation notification */ struct buf_ops buf_ops_bio = { .bop_name = "buf_ops_bio", .bop_write = bufwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, }; static struct buf *buf; /* buffer header pool */ extern struct buf *swbuf; /* Swap buffer header pool. */ caddr_t unmapped_buf; /* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ struct proc *bufdaemonproc; static int inmem(struct vnode *vp, daddr_t blkno); static void vm_hold_free_pages(struct buf *bp, int newbsize); static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to); static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m); static void vfs_clean_pages_dirty_buf(struct buf *bp); static void vfs_setdirty_locked_object(struct buf *bp); static void vfs_vmio_release(struct buf *bp); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); static int buf_flush(struct vnode *vp, int); static int flushbufqueues(struct vnode *, int, int); static void buf_daemon(void); static void bremfreel(struct buf *bp); static __inline void bd_wakeup(void); static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); #endif int vmiodirenable = TRUE; SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, "Use the VM system for directory writes"); long runningbufspace; SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, "Amount of presently outstanding async buffer io"); static long bufspace; #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD, &bufspace, 0, sysctl_bufspace, "L", "Virtual memory used for buffers"); #else SYSCTL_LONG(_vfs, OID_AUTO, bufspace, CTLFLAG_RD, &bufspace, 0, "Physical memory used for buffers"); #endif static long bufkvaspace; SYSCTL_LONG(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, 0, "Kernel virtual memory used for buffers"); static long maxbufspace; SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, "Maximum allowed value of bufspace (including buf_daemon)"); static long bufmallocspace; SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, "Amount of malloced memory for buffers"); static long maxbufmallocspace; SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0, "Maximum amount of malloced memory for buffers"); static long lobufspace; SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, "Minimum amount of buffers we want to have"); long hibufspace; SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, "Maximum allowed value of bufspace (excluding buf_daemon)"); static int bufreusecnt; SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW, &bufreusecnt, 0, "Number of times we have reused a buffer"); static int buffreekvacnt; SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, 0, "Number of times we have freed the KVA space from some buffer"); static int bufdefragcnt; SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, 0, "Number of times we have had to repeat buffer allocation to defragment"); static long lorunningspace; SYSCTL_PROC(_vfs, OID_AUTO, lorunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE | CTLFLAG_RW, &lorunningspace, 0, sysctl_runningspace, "L", "Minimum preferred space used for in-progress I/O"); static long hirunningspace; SYSCTL_PROC(_vfs, OID_AUTO, hirunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE | CTLFLAG_RW, &hirunningspace, 0, sysctl_runningspace, "L", "Maximum amount of space to use for in-progress I/O"); int dirtybufferflushes; SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes, 0, "Number of bdwrite to bawrite conversions to limit dirty buffers"); int bdwriteskip; SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip, 0, "Number of buffers supplied to bdwrite with snapshot deadlock risk"); int altbufferflushes; SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes, 0, "Number of fsync flushes to limit dirty buffers"); static int recursiveflushes; SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes, 0, "Number of flushes skipped due to being recursive"); static int numdirtybuffers; SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0, "Number of buffers that are dirty (has unwritten changes) at the moment"); static int lodirtybuffers; SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0, "How many buffers we want to have free before bufdaemon can sleep"); static int hidirtybuffers; SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0, "When the number of dirty buffers is considered severe"); int dirtybufthresh; SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh, 0, "Number of bdwrite to bawrite conversions to clear dirty buffers"); static int numfreebuffers; SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, "Number of free buffers"); static int lofreebuffers; SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, "XXX Unused"); static int hifreebuffers; SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, "XXX Complicatedly unused"); static int getnewbufcalls; SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW, &getnewbufcalls, 0, "Number of calls to getnewbuf"); static int getnewbufrestarts; SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0, "Number of times getnewbuf has had to restart a buffer aquisition"); static int mappingrestarts; SYSCTL_INT(_vfs, OID_AUTO, mappingrestarts, CTLFLAG_RW, &mappingrestarts, 0, "Number of times getblk has had to restart a buffer mapping for " "unmapped buffer"); static int flushbufqtarget = 100; SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0, "Amount of work to do in flushbufqueues when helping bufdaemon"); static long notbufdflushes; SYSCTL_LONG(_vfs, OID_AUTO, notbufdflushes, CTLFLAG_RD, ¬bufdflushes, 0, "Number of dirty buffer flushes done by the bufdaemon helpers"); static long barrierwrites; SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0, "Number of barrier writes"); SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, &unmapped_buf_allowed, 0, "Permit the use of the unmapped i/o"); /* * Lock for the non-dirty bufqueues */ static struct mtx_padalign bqclean; /* * Lock for the dirty queue. */ static struct mtx_padalign bqdirty; /* * This lock synchronizes access to bd_request. */ static struct mtx_padalign bdlock; /* * This lock protects the runningbufreq and synchronizes runningbufwakeup and * waitrunningbufspace(). */ static struct mtx_padalign rbreqlock; /* * Lock that protects needsbuffer and the sleeps/wakeups surrounding it. */ static struct rwlock_padalign nblock; /* * Lock that protects bdirtywait. */ static struct mtx_padalign bdirtylock; /* * Wakeup point for bufdaemon, as well as indicator of whether it is already * active. Set to 1 when the bufdaemon is already "on" the queue, 0 when it * is idling. */ static int bd_request; /* * Request for the buf daemon to write more buffers than is indicated by * lodirtybuf. This may be necessary to push out excess dependencies or * defragment the address space where a simple count of the number of dirty * buffers is insufficient to characterize the demand for flushing them. */ static int bd_speedupreq; /* * bogus page -- for I/O to/from partially complete buffers * this is a temporary solution to the problem, but it is not * really that bad. it would be better to split the buffer * for input in the case of buffers partially already in memory, * but the code is intricate enough already. */ vm_page_t bogus_page; /* * Synchronization (sleep/wakeup) variable for active buffer space requests. * Set when wait starts, cleared prior to wakeup(). * Used in runningbufwakeup() and waitrunningbufspace(). */ static int runningbufreq; /* * Synchronization (sleep/wakeup) variable for buffer requests. * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done * by and/or. * Used in numdirtywakeup(), bufspacewakeup(), bufcountadd(), bwillwrite(), * getnewbuf(), and getblk(). */ static volatile int needsbuffer; /* * Synchronization for bwillwrite() waiters. */ static int bdirtywait; /* * Definitions for the buffer free lists. */ #define BUFFER_QUEUES 4 /* number of free buffer queues */ #define QUEUE_NONE 0 /* on no queue */ #define QUEUE_CLEAN 1 /* non-B_DELWRI buffers */ #define QUEUE_DIRTY 2 /* B_DELWRI buffers */ #define QUEUE_EMPTY 3 /* empty buffer headers */ #define QUEUE_SENTINEL 1024 /* not an queue index, but mark for sentinel */ /* Queues for free buffers with various properties */ static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } }; #ifdef INVARIANTS static int bq_len[BUFFER_QUEUES]; #endif /* * Single global constant for BUF_WMESG, to avoid getting multiple references. * buf_wmesg is referred from macros. */ const char *buf_wmesg = BUF_WMESG; #define VFS_BIO_NEED_ANY 0x01 /* any freeable buffer */ #define VFS_BIO_NEED_FREE 0x04 /* wait for free bufs, hi hysteresis */ #define VFS_BIO_NEED_BUFSPACE 0x08 /* wait for buf space, lo hysteresis */ static int sysctl_runningspace(SYSCTL_HANDLER_ARGS) { long value; int error; value = *(long *)arg1; error = sysctl_handle_long(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL) return (error); mtx_lock(&rbreqlock); if (arg1 == &hirunningspace) { if (value < lorunningspace) error = EINVAL; else hirunningspace = value; } else { KASSERT(arg1 == &lorunningspace, ("%s: unknown arg1", __func__)); if (value > hirunningspace) error = EINVAL; else lorunningspace = value; } mtx_unlock(&rbreqlock); return (error); } #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) static int sysctl_bufspace(SYSCTL_HANDLER_ARGS) { long lvalue; int ivalue; if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long)) return (sysctl_handle_long(oidp, arg1, arg2, req)); lvalue = *(long *)arg1; if (lvalue > INT_MAX) /* On overflow, still write out a long to trigger ENOMEM. */ return (sysctl_handle_long(oidp, &lvalue, 0, req)); ivalue = lvalue; return (sysctl_handle_int(oidp, &ivalue, 0, req)); } #endif /* * bqlock: * * Return the appropriate queue lock based on the index. */ static inline struct mtx * bqlock(int qindex) { if (qindex == QUEUE_DIRTY) return (struct mtx *)(&bqdirty); return (struct mtx *)(&bqclean); } /* * bdirtywakeup: * * Wakeup any bwillwrite() waiters. */ static void bdirtywakeup(void) { mtx_lock(&bdirtylock); if (bdirtywait) { bdirtywait = 0; wakeup(&bdirtywait); } mtx_unlock(&bdirtylock); } /* * bdirtysub: * * Decrement the numdirtybuffers count by one and wakeup any * threads blocked in bwillwrite(). */ static void bdirtysub(void) { if (atomic_fetchadd_int(&numdirtybuffers, -1) == (lodirtybuffers + hidirtybuffers) / 2) bdirtywakeup(); } /* * bdirtyadd: * * Increment the numdirtybuffers count by one and wakeup the buf * daemon if needed. */ static void bdirtyadd(void) { /* * Only do the wakeup once as we cross the boundary. The * buf daemon will keep running until the condition clears. */ if (atomic_fetchadd_int(&numdirtybuffers, 1) == (lodirtybuffers + hidirtybuffers) / 2) bd_wakeup(); } /* * bufspacewakeup: * * Called when buffer space is potentially available for recovery. * getnewbuf() will block on this flag when it is unable to free * sufficient buffer space. Buffer space becomes recoverable when * bp's get placed back in the queues. */ static __inline void bufspacewakeup(void) { int need_wakeup, on; /* * If someone is waiting for bufspace, wake them up. Even * though we may not have freed the kva space yet, the waiting * process will be able to now. */ rw_rlock(&nblock); for (;;) { need_wakeup = 0; on = needsbuffer; if ((on & VFS_BIO_NEED_BUFSPACE) == 0) break; need_wakeup = 1; if (atomic_cmpset_rel_int(&needsbuffer, on, on & ~VFS_BIO_NEED_BUFSPACE)) break; } if (need_wakeup) wakeup(__DEVOLATILE(void *, &needsbuffer)); rw_runlock(&nblock); } /* * bufspaceadjust: * * Adjust the reported bufspace for a KVA managed buffer, possibly * waking any waiters. */ static void bufspaceadjust(struct buf *bp, int bufsize) { int diff; KASSERT((bp->b_flags & B_MALLOC) == 0, ("bufspaceadjust: malloc buf %p", bp)); diff = bufsize - bp->b_bufsize; if (diff < 0) { atomic_subtract_long(&bufspace, -diff); bufspacewakeup(); } else atomic_add_long(&bufspace, diff); bp->b_bufsize = bufsize; } /* * bufmallocadjust: * * Adjust the reported bufspace for a malloc managed buffer, possibly * waking any waiters. */ static void bufmallocadjust(struct buf *bp, int bufsize) { int diff; KASSERT((bp->b_flags & B_MALLOC) != 0, ("bufmallocadjust: non-malloc buf %p", bp)); diff = bufsize - bp->b_bufsize; if (diff < 0) { atomic_subtract_long(&bufmallocspace, -diff); bufspacewakeup(); } else atomic_add_long(&bufmallocspace, diff); bp->b_bufsize = bufsize; } /* * runningwakeup: * * Wake up processes that are waiting on asynchronous writes to fall * below lorunningspace. */ static void runningwakeup(void) { mtx_lock(&rbreqlock); if (runningbufreq) { runningbufreq = 0; wakeup(&runningbufreq); } mtx_unlock(&rbreqlock); } /* * runningbufwakeup: * * Decrement the outstanding write count according. */ void runningbufwakeup(struct buf *bp) { long space, bspace; bspace = bp->b_runningbufspace; if (bspace == 0) return; space = atomic_fetchadd_long(&runningbufspace, -bspace); KASSERT(space >= bspace, ("runningbufspace underflow %ld %ld", space, bspace)); bp->b_runningbufspace = 0; /* * Only acquire the lock and wakeup on the transition from exceeding * the threshold to falling below it. */ if (space < lorunningspace) return; if (space - bspace > lorunningspace) return; runningwakeup(); } /* * bufcountadd: * * Called when a buffer has been added to one of the free queues to * account for the buffer and to wakeup anyone waiting for free buffers. * This typically occurs when large amounts of metadata are being handled * by the buffer cache ( else buffer space runs out first, usually ). */ static __inline void bufcountadd(struct buf *bp) { int mask, need_wakeup, old, on; KASSERT((bp->b_flags & B_INFREECNT) == 0, ("buf %p already counted as free", bp)); bp->b_flags |= B_INFREECNT; old = atomic_fetchadd_int(&numfreebuffers, 1); KASSERT(old >= 0 && old < nbuf, ("numfreebuffers climbed to %d", old + 1)); mask = VFS_BIO_NEED_ANY; if (numfreebuffers >= hifreebuffers) mask |= VFS_BIO_NEED_FREE; rw_rlock(&nblock); for (;;) { need_wakeup = 0; on = needsbuffer; if (on == 0) break; need_wakeup = 1; if (atomic_cmpset_rel_int(&needsbuffer, on, on & ~mask)) break; } if (need_wakeup) wakeup(__DEVOLATILE(void *, &needsbuffer)); rw_runlock(&nblock); } /* * bufcountsub: * * Decrement the numfreebuffers count as needed. */ static void bufcountsub(struct buf *bp) { int old; /* * Fixup numfreebuffers count. If the buffer is invalid or not * delayed-write, the buffer was free and we must decrement * numfreebuffers. */ if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) { KASSERT((bp->b_flags & B_INFREECNT) != 0, ("buf %p not counted in numfreebuffers", bp)); bp->b_flags &= ~B_INFREECNT; old = atomic_fetchadd_int(&numfreebuffers, -1); KASSERT(old > 0, ("numfreebuffers dropped to %d", old - 1)); } } /* * waitrunningbufspace() * * runningbufspace is a measure of the amount of I/O currently * running. This routine is used in async-write situations to * prevent creating huge backups of pending writes to a device. * Only asynchronous writes are governed by this function. * * This does NOT turn an async write into a sync write. It waits * for earlier writes to complete and generally returns before the * caller's write has reached the device. */ void waitrunningbufspace(void) { mtx_lock(&rbreqlock); while (runningbufspace > hirunningspace) { runningbufreq = 1; msleep(&runningbufreq, &rbreqlock, PVM, "wdrain", 0); } mtx_unlock(&rbreqlock); } /* * vfs_buf_test_cache: * * Called when a buffer is extended. This function clears the B_CACHE * bit if the newly extended portion of the buffer does not contain * valid data. */ static __inline void vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, vm_page_t m) { VM_OBJECT_ASSERT_LOCKED(m->object); if (bp->b_flags & B_CACHE) { int base = (foff + off) & PAGE_MASK; if (vm_page_is_valid(m, base, size) == 0) bp->b_flags &= ~B_CACHE; } } /* Wake up the buffer daemon if necessary */ static __inline void bd_wakeup(void) { mtx_lock(&bdlock); if (bd_request == 0) { bd_request = 1; wakeup(&bd_request); } mtx_unlock(&bdlock); } /* * bd_speedup - speedup the buffer cache flushing code */ void bd_speedup(void) { int needwake; mtx_lock(&bdlock); needwake = 0; if (bd_speedupreq == 0 || bd_request == 0) needwake = 1; bd_speedupreq = 1; bd_request = 1; if (needwake) wakeup(&bd_request); mtx_unlock(&bdlock); } #ifndef NSWBUF_MIN #define NSWBUF_MIN 16 #endif #ifdef __i386__ #define TRANSIENT_DENOM 5 #else #define TRANSIENT_DENOM 10 #endif /* * Calculating buffer cache scaling values and reserve space for buffer * headers. This is called during low level kernel initialization and * may be called more then once. We CANNOT write to the memory area * being reserved at this time. */ caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) { int tuned_nbuf; long maxbuf, maxbuf_sz, buf_sz, biotmap_sz; /* * physmem_est is in pages. Convert it to kilobytes (assumes * PAGE_SIZE is >= 1K) */ physmem_est = physmem_est * (PAGE_SIZE / 1024); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to * cover 1/4 of our ram. Beyond the first 64MB allocate additional * buffers to cover 1/10 of our ram over 64MB. When auto-sizing * the buffer cache we limit the eventual kva reservation to * maxbcache bytes. * * factor represents the 1/4 x ram conversion. */ if (nbuf == 0) { int factor = 4 * BKVASIZE / 1024; nbuf = 50; if (physmem_est > 4096) nbuf += min((physmem_est - 4096) / factor, 65536 / factor); if (physmem_est > 65536) nbuf += min((physmem_est - 65536) * 2 / (factor * 5), 32 * 1024 * 1024 / (factor * 5)); if (maxbcache && nbuf > maxbcache / BKVASIZE) nbuf = maxbcache / BKVASIZE; tuned_nbuf = 1; } else tuned_nbuf = 0; /* XXX Avoid unsigned long overflows later on with maxbufspace. */ maxbuf = (LONG_MAX / 3) / BKVASIZE; if (nbuf > maxbuf) { if (!tuned_nbuf) printf("Warning: nbufs lowered from %d to %ld\n", nbuf, maxbuf); nbuf = maxbuf; } /* * Ideal allocation size for the transient bio submap is 10% * of the maximal space buffer map. This roughly corresponds * to the amount of the buffer mapped for typical UFS load. * * Clip the buffer map to reserve space for the transient * BIOs, if its extent is bigger than 90% (80% on i386) of the * maximum buffer map extent on the platform. * * The fall-back to the maxbuf in case of maxbcache unset, * allows to not trim the buffer KVA for the architectures * with ample KVA space. */ if (bio_transient_maxcnt == 0 && unmapped_buf_allowed) { maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * BKVASIZE; buf_sz = (long)nbuf * BKVASIZE; if (buf_sz < maxbuf_sz / TRANSIENT_DENOM * (TRANSIENT_DENOM - 1)) { /* * There is more KVA than memory. Do not * adjust buffer map size, and assign the rest * of maxbuf to transient map. */ biotmap_sz = maxbuf_sz - buf_sz; } else { /* * Buffer map spans all KVA we could afford on * this platform. Give 10% (20% on i386) of * the buffer map to the transient bio map. */ biotmap_sz = buf_sz / TRANSIENT_DENOM; buf_sz -= biotmap_sz; } if (biotmap_sz / INT_MAX > MAXPHYS) bio_transient_maxcnt = INT_MAX; else bio_transient_maxcnt = biotmap_sz / MAXPHYS; /* * Artifically limit to 1024 simultaneous in-flight I/Os * using the transient mapping. */ if (bio_transient_maxcnt > 1024) bio_transient_maxcnt = 1024; if (tuned_nbuf) nbuf = buf_sz / BKVASIZE; } /* * swbufs are used as temporary holders for I/O, such as paging I/O. * We have no less then 16 and no more then 256. */ nswbuf = min(nbuf / 4, 256); TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf); if (nswbuf < NSWBUF_MIN) nswbuf = NSWBUF_MIN; /* * Reserve space for the buffer cache buffers */ swbuf = (void *)v; v = (caddr_t)(swbuf + nswbuf); buf = (void *)v; v = (caddr_t)(buf + nbuf); return(v); } /* Initialize the buffer subsystem. Called before use of any buffers. */ void bufinit(void) { struct buf *bp; int i; CTASSERT(MAXBCACHEBUF >= MAXBSIZE); mtx_init(&bqclean, "bufq clean lock", NULL, MTX_DEF); mtx_init(&bqdirty, "bufq dirty lock", NULL, MTX_DEF); mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); rw_init(&nblock, "needsbuffer lock"); mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); /* next, make a null set of free lists */ for (i = 0; i < BUFFER_QUEUES; i++) TAILQ_INIT(&bufqueues[i]); unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); /* finally, initialize each buffer header and stick on empty q */ for (i = 0; i < nbuf; i++) { bp = &buf[i]; bzero(bp, sizeof *bp); bp->b_flags = B_INVAL | B_INFREECNT; bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; bp->b_qindex = QUEUE_EMPTY; bp->b_xflags = 0; bp->b_data = bp->b_kvabase = unmapped_buf; LIST_INIT(&bp->b_dep); BUF_LOCKINIT(bp); TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); #ifdef INVARIANTS bq_len[QUEUE_EMPTY]++; #endif } /* * maxbufspace is the absolute maximum amount of buffer space we are * allowed to reserve in KVM and in real terms. The absolute maximum * is nominally used by buf_daemon. hibufspace is the nominal maximum * used by most other processes. The differential is required to * ensure that buf_daemon is able to run when other processes might * be blocked waiting for buffer space. * * maxbufspace is based on BKVASIZE. Allocating buffers larger then * this may result in KVM fragmentation which is not handled optimally * by the system. */ maxbufspace = (long)nbuf * BKVASIZE; hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); lobufspace = hibufspace - MAXBCACHEBUF; /* * Note: The 16 MiB upper limit for hirunningspace was chosen * arbitrarily and may need further tuning. It corresponds to * 128 outstanding write IO requests (if IO size is 128 KiB), * which fits with many RAID controllers' tagged queuing limits. * The lower 1 MiB limit is the historical upper limit for * hirunningspace. */ hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), 16 * 1024 * 1024), 1024 * 1024); lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); /* * Limit the amount of malloc memory since it is wired permanently into * the kernel space. Even though this is accounted for in the buffer * allocation, we don't want the malloced region to grow uncontrolled. * The malloc scheme improves memory utilization significantly on average * (small) directories. */ maxbufmallocspace = hibufspace / 20; /* * Reduce the chance of a deadlock occuring by limiting the number * of delayed-write dirty buffers we allow to stack up. */ hidirtybuffers = nbuf / 4 + 20; dirtybufthresh = hidirtybuffers * 9 / 10; numdirtybuffers = 0; /* * To support extreme low-memory systems, make sure hidirtybuffers cannot * eat up all available buffer space. This occurs when our minimum cannot * be met. We try to size hidirtybuffers to 3/4 our buffer space assuming * BKVASIZE'd buffers. */ while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) { hidirtybuffers >>= 1; } lodirtybuffers = hidirtybuffers / 2; /* * Try to keep the number of free buffers in the specified range, * and give special processes (e.g. like buf_daemon) access to an * emergency reserve. */ lofreebuffers = nbuf / 18 + 5; hifreebuffers = 2 * lofreebuffers; numfreebuffers = nbuf; bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); } #ifdef INVARIANTS static inline void vfs_buf_check_mapped(struct buf *bp) { KASSERT(bp->b_kvabase != unmapped_buf, ("mapped buf: b_kvabase was not updated %p", bp)); KASSERT(bp->b_data != unmapped_buf, ("mapped buf: b_data was not updated %p", bp)); KASSERT(bp->b_data < unmapped_buf || bp->b_data >= unmapped_buf + MAXPHYS, ("b_data + b_offset unmapped %p", bp)); } static inline void vfs_buf_check_unmapped(struct buf *bp) { KASSERT(bp->b_data == unmapped_buf, ("unmapped buf: corrupted b_data %p", bp)); } #define BUF_CHECK_MAPPED(bp) vfs_buf_check_mapped(bp) #define BUF_CHECK_UNMAPPED(bp) vfs_buf_check_unmapped(bp) #else #define BUF_CHECK_MAPPED(bp) do {} while (0) #define BUF_CHECK_UNMAPPED(bp) do {} while (0) #endif static int isbufbusy(struct buf *bp) { if (((bp->b_flags & (B_INVAL | B_PERSISTENT)) == 0 && BUF_ISLOCKED(bp)) || ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)) return (1); return (0); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ void bufshutdown(int show_busybufs) { static int first_buf_printf = 1; struct buf *bp; int iter, nbusy, pbusy; #ifndef PREEMPTION int subiter; #endif /* * Sync filesystems for shutdown */ wdog_kern_pat(WD_LASTVAL); sys_sync(curthread, NULL); /* * With soft updates, some buffers that are * written will be remarked as dirty until other * buffers are written. */ for (iter = pbusy = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) if (isbufbusy(bp)) nbusy++; if (nbusy == 0) { if (first_buf_printf) printf("All buffers synced."); break; } if (first_buf_printf) { printf("Syncing disks, buffers remaining... "); first_buf_printf = 0; } printf("%d ", nbusy); if (nbusy < pbusy) iter = 0; pbusy = nbusy; wdog_kern_pat(WD_LASTVAL); sys_sync(curthread, NULL); #ifdef PREEMPTION /* * Drop Giant and spin for a while to allow * interrupt threads to run. */ DROP_GIANT(); DELAY(50000 * iter); PICKUP_GIANT(); #else /* * Drop Giant and context switch several times to * allow interrupt threads to run. */ DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { thread_lock(curthread); mi_switch(SW_VOL, NULL); thread_unlock(curthread); DELAY(1000); } PICKUP_GIANT(); #endif } printf("\n"); /* * Count only busy local buffers to prevent forcing * a fsck if we're just a client of a wedged NFS server */ nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if (isbufbusy(bp)) { #if 0 /* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */ if (bp->b_dev == NULL) { TAILQ_REMOVE(&mountlist, bp->b_vp->v_mount, mnt_list); continue; } #endif nbusy++; if (show_busybufs > 0) { printf( "%d: buf:%p, vnode:%p, flags:%0x, blkno:%jd, lblkno:%jd, buflock:", nbusy, bp, bp->b_vp, bp->b_flags, (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno); BUF_LOCKPRINTINFO(bp); if (show_busybufs > 1) vn_printf(bp->b_vp, "vnode content: "); } } } if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ printf("Giving up on %d buffers\n", nbusy); DELAY(5000000); /* 5 seconds */ } else { if (!first_buf_printf) printf("Final sync complete\n"); /* * Unmount filesystems */ if (panicstr == 0) vfs_unmountall(); } swapoff_all(); DELAY(100000); /* wait for console output to finish */ } static void bpmap_qenter(struct buf *bp) { BUF_CHECK_MAPPED(bp); /* * bp->b_data is relative to bp->b_offset, but * bp->b_offset may be offset into the first page. */ bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data); pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | (vm_offset_t)(bp->b_offset & PAGE_MASK)); } /* * binsfree: * * Insert the buffer into the appropriate free list. */ static void binsfree(struct buf *bp, int qindex) { struct mtx *olock, *nlock; BUF_ASSERT_XLOCKED(bp); nlock = bqlock(qindex); /* Handle delayed bremfree() processing. */ if (bp->b_flags & B_REMFREE) { olock = bqlock(bp->b_qindex); mtx_lock(olock); bremfreel(bp); if (olock != nlock) { mtx_unlock(olock); mtx_lock(nlock); } } else mtx_lock(nlock); if (bp->b_qindex != QUEUE_NONE) panic("binsfree: free buffer onto another queue???"); bp->b_qindex = qindex; if (bp->b_flags & B_AGE) TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist); else TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist); #ifdef INVARIANTS bq_len[bp->b_qindex]++; #endif mtx_unlock(nlock); /* * Something we can maybe free or reuse. */ if (bp->b_bufsize && !(bp->b_flags & B_DELWRI)) bufspacewakeup(); if ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI)) bufcountadd(bp); } /* * bremfree: * * Mark the buffer for removal from the appropriate free list. * */ void bremfree(struct buf *bp) { CTR3(KTR_BUF, "bremfree(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT((bp->b_flags & B_REMFREE) == 0, ("bremfree: buffer %p already marked for delayed removal.", bp)); KASSERT(bp->b_qindex != QUEUE_NONE, ("bremfree: buffer %p not on a queue.", bp)); BUF_ASSERT_XLOCKED(bp); bp->b_flags |= B_REMFREE; bufcountsub(bp); } /* * bremfreef: * * Force an immediate removal from a free list. Used only in nfs when * it abuses the b_freelist pointer. */ void bremfreef(struct buf *bp) { struct mtx *qlock; qlock = bqlock(bp->b_qindex); mtx_lock(qlock); bremfreel(bp); mtx_unlock(qlock); } /* * bremfreel: * * Removes a buffer from the free list, must be called with the * correct qlock held. */ static void bremfreel(struct buf *bp) { CTR3(KTR_BUF, "bremfreel(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_qindex != QUEUE_NONE, ("bremfreel: buffer %p not on a queue.", bp)); BUF_ASSERT_XLOCKED(bp); mtx_assert(bqlock(bp->b_qindex), MA_OWNED); TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); #ifdef INVARIANTS KASSERT(bq_len[bp->b_qindex] >= 1, ("queue %d underflow", bp->b_qindex)); bq_len[bp->b_qindex]--; #endif bp->b_qindex = QUEUE_NONE; /* * If this was a delayed bremfree() we only need to remove the buffer * from the queue and return the stats are already done. */ if (bp->b_flags & B_REMFREE) { bp->b_flags &= ~B_REMFREE; return; } bufcountsub(bp); } /* * bufkvafree: * * Free the kva allocation for a buffer. * */ static void bufkvafree(struct buf *bp) { #ifdef INVARIANTS if (bp->b_kvasize == 0) { KASSERT(bp->b_kvabase == unmapped_buf && bp->b_data == unmapped_buf, ("Leaked KVA space on %p", bp)); } else if (buf_mapped(bp)) BUF_CHECK_MAPPED(bp); else BUF_CHECK_UNMAPPED(bp); #endif if (bp->b_kvasize == 0) return; vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase, bp->b_kvasize); atomic_subtract_long(&bufkvaspace, bp->b_kvasize); atomic_add_int(&buffreekvacnt, 1); bp->b_data = bp->b_kvabase = unmapped_buf; bp->b_kvasize = 0; } /* * bufkvaalloc: * * Allocate the buffer KVA and set b_kvasize and b_kvabase. */ static int bufkvaalloc(struct buf *bp, int maxsize, int gbflags) { vm_offset_t addr; int error; KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0, ("Invalid gbflags 0x%x in %s", gbflags, __func__)); bufkvafree(bp); addr = 0; error = vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr); if (error != 0) { /* * Buffer map is too fragmented. Request the caller * to defragment the map. */ atomic_add_int(&bufdefragcnt, 1); return (error); } bp->b_kvabase = (caddr_t)addr; bp->b_kvasize = maxsize; atomic_add_long(&bufkvaspace, bp->b_kvasize); if ((gbflags & GB_UNMAPPED) != 0) { bp->b_data = unmapped_buf; BUF_CHECK_UNMAPPED(bp); } else { bp->b_data = bp->b_kvabase; BUF_CHECK_MAPPED(bp); } return (0); } /* * Attempt to initiate asynchronous I/O on read-ahead blocks. We must * clear BIO_ERROR and B_INVAL prior to initiating I/O . If B_CACHE is set, * the buffer is valid and we do not have to do anything. */ void breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, int cnt, struct ucred * cred) { struct buf *rabp; int i; for (i = 0; i < cnt; i++, rablkno++, rabsize++) { if (inmem(vp, *rablkno)) continue; rabp = getblk(vp, *rablkno, *rabsize, 0, 0, 0); if ((rabp->b_flags & B_CACHE) == 0) { if (!TD_IS_IDLETHREAD(curthread)) curthread->td_ru.ru_inblock++; rabp->b_flags |= B_ASYNC; rabp->b_flags &= ~B_INVAL; rabp->b_ioflags &= ~BIO_ERROR; rabp->b_iocmd = BIO_READ; if (rabp->b_rcred == NOCRED && cred != NOCRED) rabp->b_rcred = crhold(cred); vfs_busy_pages(rabp, 0); BUF_KERNPROC(rabp); rabp->b_iooffset = dbtob(rabp->b_blkno); bstrategy(rabp); } else { brelse(rabp); } } } /* * Entry point for bread() and breadn() via #defines in sys/buf.h. * * Get a buffer with the specified data. Look in the cache first. We * must clear BIO_ERROR and B_INVAL prior to initiating I/O. If B_CACHE * is set, the buffer is valid and we do not have to do anything, see * getblk(). Also starts asynchronous I/O on read-ahead blocks. */ int breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno, int *rabsize, int cnt, struct ucred *cred, int flags, struct buf **bpp) { struct buf *bp; int rv = 0, readwait = 0; CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size); /* * Can only return NULL if GB_LOCK_NOWAIT flag is specified. */ *bpp = bp = getblk(vp, blkno, size, 0, 0, flags); if (bp == NULL) return (EBUSY); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (!TD_IS_IDLETHREAD(curthread)) curthread->td_ru.ru_inblock++; bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if (bp->b_rcred == NOCRED && cred != NOCRED) bp->b_rcred = crhold(cred); vfs_busy_pages(bp, 0); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); ++readwait; } breada(vp, rablkno, rabsize, cnt, cred); if (readwait) { rv = bufwait(bp); } return (rv); } /* * Write, release buffer on completion. (Done by iodone * if async). Do not bother writing anything if the buffer * is invalid. * * Note that we set B_CACHE here, indicating that buffer is * fully valid and thus cacheable. This is true even of NFS * now so we set it generally. This could be set either here * or in biodone() since the I/O is synchronous. We put it * here. */ int bufwrite(struct buf *bp) { int oldflags; struct vnode *vp; long space; int vp_md; CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); if ((bp->b_bufobj->bo_flag & BO_DEAD) != 0) { bp->b_flags |= B_INVAL | B_RELBUF; bp->b_flags &= ~B_CACHE; brelse(bp); return (ENXIO); } if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } if (bp->b_flags & B_BARRIER) barrierwrites++; oldflags = bp->b_flags; BUF_ASSERT_HELD(bp); if (bp->b_pin_count > 0) bunpin_wait(bp); KASSERT(!(bp->b_vflags & BV_BKGRDINPROG), ("FFS background buffer should not get here %p", bp)); vp = bp->b_vp; if (vp) vp_md = vp->v_vflag & VV_MD; else vp_md = 0; /* * Mark the buffer clean. Increment the bufobj write count * before bundirty() call, to prevent other thread from seeing * empty dirty list and zero counter for writes in progress, * falsely indicating that the bufobj is clean. */ bufobj_wref(bp->b_bufobj); bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_flags |= B_CACHE; bp->b_iocmd = BIO_WRITE; vfs_busy_pages(bp, 1); /* * Normal bwrites pipeline writes */ bp->b_runningbufspace = bp->b_bufsize; space = atomic_fetchadd_long(&runningbufspace, bp->b_runningbufspace); if (!TD_IS_IDLETHREAD(curthread)) curthread->td_ru.ru_oublock++; if (oldflags & B_ASYNC) BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if ((oldflags & B_ASYNC) == 0) { int rtval = bufwait(bp); brelse(bp); return (rtval); } else if (space > hirunningspace) { /* * don't allow the async write to saturate the I/O * system. We will not deadlock here because * we are blocking waiting for I/O that is already in-progress * to complete. We do not block here if it is the update * or syncer daemon trying to clean up as that can lead * to deadlock. */ if ((curthread->td_pflags & TDP_NORUNNINGBUF) == 0 && !vp_md) waitrunningbufspace(); } return (0); } void bufbdflush(struct bufobj *bo, struct buf *bp) { struct buf *nbp; if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) { (void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread); altbufferflushes++; } else if (bo->bo_dirty.bv_cnt > dirtybufthresh) { BO_LOCK(bo); /* * Try to find a buffer to flush. */ TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) { if ((nbp->b_vflags & BV_BKGRDINPROG) || BUF_LOCK(nbp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) continue; if (bp == nbp) panic("bdwrite: found ourselves"); BO_UNLOCK(bo); /* Don't countdeps with the bo lock held. */ if (buf_countdeps(nbp, 0)) { BO_LOCK(bo); BUF_UNLOCK(nbp); continue; } if (nbp->b_flags & B_CLUSTEROK) { vfs_bio_awrite(nbp); } else { bremfree(nbp); bawrite(nbp); } dirtybufferflushes++; break; } if (nbp == NULL) BO_UNLOCK(bo); } } /* * Delayed write. (Buffer is marked dirty). Do not bother writing * anything if the buffer is marked invalid. * * Note that since the buffer must be completely valid, we can safely * set B_CACHE. In fact, we have to set B_CACHE here rather then in * biodone() in order to prevent getblk from writing the buffer * out synchronously. */ void bdwrite(struct buf *bp) { struct thread *td = curthread; struct vnode *vp; struct bufobj *bo; CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); KASSERT((bp->b_flags & B_BARRIER) == 0, ("Barrier request in delayed write %p", bp)); BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return; } /* * If we have too many dirty buffers, don't create any more. * If we are wildly over our limit, then force a complete * cleanup. Otherwise, just keep the situation from getting * out of control. Note that we have to avoid a recursive * disaster and not try to clean up after our own cleanup! */ vp = bp->b_vp; bo = bp->b_bufobj; if ((td->td_pflags & (TDP_COWINPROGRESS|TDP_INBDFLUSH)) == 0) { td->td_pflags |= TDP_INBDFLUSH; BO_BDFLUSH(bo, bp); td->td_pflags &= ~TDP_INBDFLUSH; } else recursiveflushes++; bdirty(bp); /* * Set B_CACHE, indicating that the buffer is fully valid. This is * true even of NFS now. */ bp->b_flags |= B_CACHE; /* * This bmap keeps the system from needing to do the bmap later, * perhaps when the system is attempting to do a sync. Since it * is likely that the indirect block -- or whatever other datastructure * that the filesystem needs is still in memory now, it is a good * thing to do this. Note also, that if the pageout daemon is * requesting a sync -- there might not be enough memory to do * the bmap then... So, this is important to do. */ if (vp->v_type != VCHR && bp->b_lblkno == bp->b_blkno) { VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); } /* * Set the *dirty* buffer range based upon the VM system dirty * pages. * * Mark the buffer pages as clean. We need to do this here to * satisfy the vnode_pager and the pageout daemon, so that it * thinks that the pages have been "cleaned". Note that since * the pages are in a delayed write buffer -- the VFS layer * "will" see that the pages get written out on the next sync, * or perhaps the cluster will be completed. */ vfs_clean_pages_dirty_buf(bp); bqrelse(bp); /* * note: we cannot initiate I/O from a bdwrite even if we wanted to, * due to the softdep code. */ } /* * bdirty: * * Turn buffer into delayed write request. We must clear BIO_READ and * B_RELBUF, and we must set B_DELWRI. We reassign the buffer to * itself to properly update it in the dirty/clean lists. We mark it * B_DONE to ensure that any asynchronization of the buffer properly * clears B_DONE ( else a panic will occur later ). * * bdirty() is kinda like bdwrite() - we have to clear B_INVAL which * might have been set pre-getblk(). Unlike bwrite/bdwrite, bdirty() * should only be called if the buffer is known-good. * * Since the buffer is not on a queue, we do not update the numfreebuffers * count. * * The buffer must be on QUEUE_NONE. */ void bdirty(struct buf *bp) { CTR3(KTR_BUF, "bdirty(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE, ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex)); BUF_ASSERT_HELD(bp); bp->b_flags &= ~(B_RELBUF); bp->b_iocmd = BIO_WRITE; if ((bp->b_flags & B_DELWRI) == 0) { bp->b_flags |= /* XXX B_DONE | */ B_DELWRI; reassignbuf(bp); bdirtyadd(); } } /* * bundirty: * * Clear B_DELWRI for buffer. * * Since the buffer is not on a queue, we do not update the numfreebuffers * count. * * The buffer must be on QUEUE_NONE. */ void bundirty(struct buf *bp) { CTR3(KTR_BUF, "bundirty(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE, ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex)); BUF_ASSERT_HELD(bp); if (bp->b_flags & B_DELWRI) { bp->b_flags &= ~B_DELWRI; reassignbuf(bp); bdirtysub(); } /* * Since it is now being written, we can clear its deferred write flag. */ bp->b_flags &= ~B_DEFERRED; } /* * bawrite: * * Asynchronous write. Start output on a buffer, but do not wait for * it to complete. The buffer is released when the output completes. * * bwrite() ( or the VOP routine anyway ) is responsible for handling * B_INVAL buffers. Not us. */ void bawrite(struct buf *bp) { bp->b_flags |= B_ASYNC; (void) bwrite(bp); } /* * babarrierwrite: * * Asynchronous barrier write. Start output on a buffer, but do not * wait for it to complete. Place a write barrier after this write so * that this buffer and all buffers written before it are committed to * the disk before any buffers written after this write are committed * to the disk. The buffer is released when the output completes. */ void babarrierwrite(struct buf *bp) { bp->b_flags |= B_ASYNC | B_BARRIER; (void) bwrite(bp); } /* * bbarrierwrite: * * Synchronous barrier write. Start output on a buffer and wait for * it to complete. Place a write barrier after this write so that * this buffer and all buffers written before it are committed to * the disk before any buffers written after this write are committed * to the disk. The buffer is released when the output completes. */ int bbarrierwrite(struct buf *bp) { bp->b_flags |= B_BARRIER; return (bwrite(bp)); } /* * bwillwrite: * * Called prior to the locking of any vnodes when we are expecting to * write. We do not want to starve the buffer cache with too many * dirty buffers so we block here. By blocking prior to the locking * of any vnodes we attempt to avoid the situation where a locked vnode * prevents the various system daemons from flushing related buffers. */ void bwillwrite(void) { if (numdirtybuffers >= hidirtybuffers) { mtx_lock(&bdirtylock); while (numdirtybuffers >= hidirtybuffers) { bdirtywait = 1; msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4), "flswai", 0); } mtx_unlock(&bdirtylock); } } /* * Return true if we have too many dirty buffers. */ int buf_dirty_count_severe(void) { return(numdirtybuffers >= hidirtybuffers); } /* * brelse: * * Release a busy buffer and, if requested, free its resources. The * buffer will be stashed in the appropriate bufqueue[] allowing it * to be accessed later as a cache entity or reused for other purposes. */ void brelse(struct buf *bp) { int qindex; CTR3(KTR_BUF, "brelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); if (BUF_LOCKRECURSED(bp)) { /* * Do not process, in particular, do not handle the * B_INVAL/B_RELBUF and do not release to free list. */ BUF_UNLOCK(bp); return; } if (bp->b_flags & B_MANAGED) { bqrelse(bp); return; } if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) { BO_LOCK(bp->b_bufobj); bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); bdirty(bp); } if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) && bp->b_error == EIO && !(bp->b_flags & B_INVAL)) { /* * Failed write, redirty. Must clear BIO_ERROR to prevent * pages from being scrapped. If the error is anything * other than an I/O error (EIO), assume that retrying * is futile. */ bp->b_ioflags &= ~BIO_ERROR; bdirty(bp); } else if ((bp->b_flags & (B_NOCACHE | B_INVAL)) || (bp->b_ioflags & BIO_ERROR) || (bp->b_bufsize <= 0)) { /* * Either a failed I/O or we were asked to free or not * cache the buffer. */ bp->b_flags |= B_INVAL; if (!LIST_EMPTY(&bp->b_dep)) buf_deallocate(bp); if (bp->b_flags & B_DELWRI) bdirtysub(); bp->b_flags &= ~(B_DELWRI | B_CACHE); if ((bp->b_flags & B_VMIO) == 0) { if (bp->b_bufsize) allocbuf(bp, 0); if (bp->b_vp) brelvp(bp); } } /* * We must clear B_RELBUF if B_DELWRI is set. If vfs_vmio_release() * is called with B_DELWRI set, the underlying pages may wind up * getting freed causing a previous write (bdwrite()) to get 'lost' * because pages associated with a B_DELWRI bp are marked clean. * * We still allow the B_INVAL case to call vfs_vmio_release(), even * if B_DELWRI is set. */ if (bp->b_flags & B_DELWRI) bp->b_flags &= ~B_RELBUF; /* * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer * constituted, not even NFS buffers now. Two flags effect this. If * B_INVAL, the struct buf is invalidated but the VM object is kept * around ( i.e. so it is trivial to reconstitute the buffer later ). * * If BIO_ERROR or B_NOCACHE is set, pages in the VM object will be * invalidated. BIO_ERROR cannot be set for a failed write unless the * buffer is also B_INVAL because it hits the re-dirtying code above. * * Normally we can do this whether a buffer is B_DELWRI or not. If * the buffer is an NFS buffer, it is tracking piecemeal writes or * the commit state and we cannot afford to lose the buffer. If the * buffer has a background write in progress, we need to keep it * around to prevent it from being reconstituted and starting a second * background write. */ if ((bp->b_flags & B_VMIO) && !(bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && !vn_isdisk(bp->b_vp, NULL) && (bp->b_flags & B_DELWRI)) ) { int i, j, resid; vm_page_t m; off_t foff; vm_pindex_t poff; vm_object_t obj; obj = bp->b_bufobj->bo_object; /* * Get the base offset and length of the buffer. Note that * in the VMIO case if the buffer block size is not * page-aligned then b_data pointer may not be page-aligned. * But our b_pages[] array *IS* page aligned. * * block sizes less then DEV_BSIZE (usually 512) are not * supported due to the page granularity bits (m->valid, * m->dirty, etc...). * * See man buf(9) for more information */ resid = bp->b_bufsize; foff = bp->b_offset; for (i = 0; i < bp->b_npages; i++) { int had_bogus = 0; m = bp->b_pages[i]; /* * If we hit a bogus page, fixup *all* the bogus pages * now. */ if (m == bogus_page) { poff = OFF_TO_IDX(bp->b_offset); had_bogus = 1; VM_OBJECT_RLOCK(obj); for (j = i; j < bp->b_npages; j++) { vm_page_t mtmp; mtmp = bp->b_pages[j]; if (mtmp == bogus_page) { mtmp = vm_page_lookup(obj, poff + j); if (!mtmp) { panic("brelse: page missing\n"); } bp->b_pages[j] = mtmp; } } VM_OBJECT_RUNLOCK(obj); if ((bp->b_flags & B_INVAL) == 0 && buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); pmap_qenter( trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); } m = bp->b_pages[i]; } if ((bp->b_flags & B_NOCACHE) || (bp->b_ioflags & BIO_ERROR && bp->b_iocmd == BIO_READ)) { int poffset = foff & PAGE_MASK; int presid = resid > (PAGE_SIZE - poffset) ? (PAGE_SIZE - poffset) : resid; KASSERT(presid >= 0, ("brelse: extra page")); VM_OBJECT_WLOCK(obj); while (vm_page_xbusied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(obj); vm_page_busy_sleep(m, "mbncsh"); VM_OBJECT_WLOCK(obj); } if (pmap_page_wired_mappings(m) == 0) vm_page_set_invalid(m, poffset, presid); VM_OBJECT_WUNLOCK(obj); if (had_bogus) printf("avoided corruption bug in bogus_page/brelse code\n"); } resid -= PAGE_SIZE - (foff & PAGE_MASK); foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; } if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); } else if (bp->b_flags & B_VMIO) { if (bp->b_flags & (B_INVAL | B_RELBUF)) { vfs_vmio_release(bp); } } else if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0) { if (bp->b_bufsize != 0) allocbuf(bp, 0); if (bp->b_vp != NULL) brelvp(bp); } /* * If the buffer has junk contents signal it and eventually * clean up B_DELWRI and diassociate the vnode so that gbincore() * doesn't find it. */ if (bp->b_bufsize == 0 || (bp->b_ioflags & BIO_ERROR) != 0 || (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF)) != 0) bp->b_flags |= B_INVAL; if (bp->b_flags & B_INVAL) { if (bp->b_flags & B_DELWRI) bundirty(bp); if (bp->b_vp) brelvp(bp); } /* buffers with no memory */ if (bp->b_bufsize == 0) { bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA); if (bp->b_vflags & BV_BKGRDINPROG) panic("losing buffer 1"); bufkvafree(bp); qindex = QUEUE_EMPTY; bp->b_flags |= B_AGE; /* buffers with junk contents */ } else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) || (bp->b_ioflags & BIO_ERROR)) { bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA); if (bp->b_vflags & BV_BKGRDINPROG) panic("losing buffer 2"); qindex = QUEUE_CLEAN; bp->b_flags |= B_AGE; /* remaining buffers */ } else if (bp->b_flags & B_DELWRI) qindex = QUEUE_DIRTY; else qindex = QUEUE_CLEAN; binsfree(bp, qindex); bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT); if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY)) panic("brelse: not dirty"); /* unlock */ BUF_UNLOCK(bp); } /* * Release a buffer back to the appropriate queue but do not try to free * it. The buffer is expected to be used again soon. * * bqrelse() is used by bdwrite() to requeue a delayed write, and used by * biodone() to requeue an async I/O on completion. It is also used when * known good buffers need to be requeued but we think we may need the data * again soon. * * XXX we should be able to leave the B_RELBUF hint set on completion. */ void bqrelse(struct buf *bp) { int qindex; CTR3(KTR_BUF, "bqrelse(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); if (BUF_LOCKRECURSED(bp)) { /* do not release to free list */ BUF_UNLOCK(bp); return; } bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); if (bp->b_flags & B_MANAGED) { if (bp->b_flags & B_REMFREE) bremfreef(bp); goto out; } /* buffers with stale but valid contents */ if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) { BO_LOCK(bp->b_bufobj); bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); qindex = QUEUE_DIRTY; } else { if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY)) panic("bqrelse: not dirty"); qindex = QUEUE_CLEAN; } binsfree(bp, qindex); out: /* unlock */ BUF_UNLOCK(bp); } /* Give pages used by the bp back to the VM system (where possible) */ static void vfs_vmio_release(struct buf *bp) { vm_object_t obj; vm_page_t m; int i; + bool freed; if (buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); pmap_qremove(trunc_page((vm_offset_t)bp->b_data), bp->b_npages); } else BUF_CHECK_UNMAPPED(bp); obj = bp->b_bufobj->bo_object; if (obj != NULL) VM_OBJECT_WLOCK(obj); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; - /* - * In order to keep page LRU ordering consistent, put - * everything on the inactive queue. - */ vm_page_lock(m); - vm_page_unwire(m, PQ_INACTIVE); - - /* - * Might as well free the page if we can and it has - * no valid data. We also free the page if the - * buffer was used for direct I/O - */ - if ((bp->b_flags & B_ASYNC) == 0 && !m->valid) { - if (m->wire_count == 0 && !vm_page_busied(m)) - vm_page_free(m); - } else if (bp->b_flags & B_DIRECT) - vm_page_try_to_free(m); + if (vm_page_unwire(m, PQ_NONE)) { + /* + * Determine if the page should be freed before adding + * it to the inactive queue. + */ + if ((bp->b_flags & B_ASYNC) == 0 && m->valid == 0) { + freed = !vm_page_busied(m); + if (freed) + vm_page_free(m); + } else if ((bp->b_flags & B_DIRECT) != 0) + freed = vm_page_try_to_free(m); + else + freed = false; + if (!freed) { + /* + * In order to maintain LRU page ordering, put + * the page at the tail of the inactive queue. + */ + vm_page_deactivate(m); + } + } vm_page_unlock(m); } if (obj != NULL) VM_OBJECT_WUNLOCK(obj); if (bp->b_bufsize) bufspaceadjust(bp, 0); bp->b_npages = 0; bp->b_flags &= ~B_VMIO; if (bp->b_vp) brelvp(bp); } /* * Check to see if a block at a particular lbn is available for a clustered * write. */ static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno) { struct buf *bpa; int match; match = 0; /* If the buf isn't in core skip it */ if ((bpa = gbincore(&vp->v_bufobj, lblkno)) == NULL) return (0); /* If the buf is busy we don't want to wait for it */ if (BUF_LOCK(bpa, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) return (0); /* Only cluster with valid clusterable delayed write buffers */ if ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) != (B_DELWRI | B_CLUSTEROK)) goto done; if (bpa->b_bufsize != size) goto done; /* * Check to see if it is in the expected place on disk and that the * block has been mapped. */ if ((bpa->b_blkno != bpa->b_lblkno) && (bpa->b_blkno == blkno)) match = 1; done: BUF_UNLOCK(bpa); return (match); } /* * vfs_bio_awrite: * * Implement clustered async writes for clearing out B_DELWRI buffers. * This is much better then the old way of writing only one buffer at * a time. Note that we may not be presented with the buffers in the * correct order, so we search for the cluster in both directions. */ int vfs_bio_awrite(struct buf *bp) { struct bufobj *bo; int i; int j; daddr_t lblkno = bp->b_lblkno; struct vnode *vp = bp->b_vp; int ncl; int nwritten; int size; int maxcl; int gbflags; bo = &vp->v_bufobj; gbflags = (bp->b_data == unmapped_buf) ? GB_UNMAPPED : 0; /* * right now we support clustered writing only to regular files. If * we find a clusterable block we could be in the middle of a cluster * rather then at the beginning. */ if ((vp->v_type == VREG) && (vp->v_mount != 0) && /* Only on nodes that have the size info */ (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { size = vp->v_mount->mnt_stat.f_iosize; maxcl = MAXPHYS / size; BO_RLOCK(bo); for (i = 1; i < maxcl; i++) if (vfs_bio_clcheck(vp, size, lblkno + i, bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0) break; for (j = 1; i + j <= maxcl && j <= lblkno; j++) if (vfs_bio_clcheck(vp, size, lblkno - j, bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0) break; BO_RUNLOCK(bo); --j; ncl = i + j; /* * this is a possible cluster write */ if (ncl != 1) { BUF_UNLOCK(bp); nwritten = cluster_wbuild(vp, size, lblkno - j, ncl, gbflags); return (nwritten); } } bremfree(bp); bp->b_flags |= B_ASYNC; /* * default (old) behavior, writing out only one block * * XXX returns b_bufsize instead of b_bcount for nwritten? */ nwritten = bp->b_bufsize; (void) bwrite(bp); return (nwritten); } /* * Ask the bufdaemon for help, or act as bufdaemon itself, when a * locked vnode is supplied. */ static void getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo, int defrag) { struct thread *td; char *waitmsg; int error, fl, flags, norunbuf; mtx_assert(&bqclean, MA_OWNED); if (defrag) { flags = VFS_BIO_NEED_BUFSPACE; waitmsg = "nbufkv"; } else if (bufspace >= hibufspace) { waitmsg = "nbufbs"; flags = VFS_BIO_NEED_BUFSPACE; } else { waitmsg = "newbuf"; flags = VFS_BIO_NEED_ANY; } atomic_set_int(&needsbuffer, flags); mtx_unlock(&bqclean); bd_speedup(); /* heeeelp */ if ((gbflags & GB_NOWAIT_BD) != 0) return; td = curthread; rw_wlock(&nblock); while ((needsbuffer & flags) != 0) { if (vp != NULL && vp->v_type != VCHR && (td->td_pflags & TDP_BUFNEED) == 0) { rw_wunlock(&nblock); /* * getblk() is called with a vnode locked, and * some majority of the dirty buffers may as * well belong to the vnode. Flushing the * buffers there would make a progress that * cannot be achieved by the buf_daemon, that * cannot lock the vnode. */ norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | (td->td_pflags & TDP_NORUNNINGBUF); /* * Play bufdaemon. The getnewbuf() function * may be called while the thread owns lock * for another dirty buffer for the same * vnode, which makes it impossible to use * VOP_FSYNC() there, due to the buffer lock * recursion. */ td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; fl = buf_flush(vp, flushbufqtarget); td->td_pflags &= norunbuf; rw_wlock(&nblock); if (fl != 0) continue; if ((needsbuffer & flags) == 0) break; } error = rw_sleep(__DEVOLATILE(void *, &needsbuffer), &nblock, (PRIBIO + 4) | slpflag, waitmsg, slptimeo); if (error != 0) break; } rw_wunlock(&nblock); } static void getnewbuf_reuse_bp(struct buf *bp, int qindex) { CTR6(KTR_BUF, "getnewbuf(%p) vp %p flags %X kvasize %d bufsize %d " "queue %d (recycling)", bp, bp->b_vp, bp->b_flags, bp->b_kvasize, bp->b_bufsize, qindex); mtx_assert(&bqclean, MA_NOTOWNED); /* * Note: we no longer distinguish between VMIO and non-VMIO * buffers. */ KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex)); if (qindex == QUEUE_CLEAN) { if (bp->b_flags & B_VMIO) { bp->b_flags &= ~B_ASYNC; vfs_vmio_release(bp); } if (bp->b_vp != NULL) brelvp(bp); } /* * Get the rest of the buffer freed up. b_kva* is still valid * after this operation. */ if (bp->b_rcred != NOCRED) { crfree(bp->b_rcred); bp->b_rcred = NOCRED; } if (bp->b_wcred != NOCRED) { crfree(bp->b_wcred); bp->b_wcred = NOCRED; } if (!LIST_EMPTY(&bp->b_dep)) buf_deallocate(bp); if (bp->b_vflags & BV_BKGRDINPROG) panic("losing buffer 3"); KASSERT(bp->b_vp == NULL, ("bp: %p still has vnode %p. qindex: %d", bp, bp->b_vp, qindex)); KASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0, ("bp: %p still on a buffer list. xflags %X", bp, bp->b_xflags)); if (bp->b_bufsize) allocbuf(bp, 0); bp->b_flags = 0; bp->b_ioflags = 0; bp->b_xflags = 0; KASSERT((bp->b_flags & B_INFREECNT) == 0, ("buf %p still counted as free?", bp)); bp->b_vflags = 0; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; bp->b_offset = NOOFFSET; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; bp->b_npages = 0; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_bufobj = NULL; bp->b_pin_count = 0; bp->b_data = bp->b_kvabase; bp->b_fsprivate1 = NULL; bp->b_fsprivate2 = NULL; bp->b_fsprivate3 = NULL; LIST_INIT(&bp->b_dep); } static struct buf * getnewbuf_scan(int maxsize, int defrag, int unmapped, int metadata) { struct buf *bp, *nbp; int nqindex, qindex, pass; KASSERT(!unmapped || !defrag, ("both unmapped and defrag")); pass = 0; restart: if (pass != 0) atomic_add_int(&getnewbufrestarts, 1); nbp = NULL; mtx_lock(&bqclean); /* * If we're not defragging or low on bufspace attempt to make a new * buf from a header. */ if (defrag == 0 && bufspace + maxsize < hibufspace) { nqindex = QUEUE_EMPTY; nbp = TAILQ_FIRST(&bufqueues[nqindex]); } /* * All available buffers might be clean or we need to start recycling. */ if (nbp == NULL) { nqindex = QUEUE_CLEAN; nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]); } /* * Run scan, possibly freeing data and/or kva mappings on the fly * depending. */ while ((bp = nbp) != NULL) { qindex = nqindex; /* * Calculate next bp (we can only use it if we do not * release the bqlock) */ if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) { switch (qindex) { case QUEUE_EMPTY: nqindex = QUEUE_CLEAN; nbp = TAILQ_FIRST(&bufqueues[nqindex]); if (nbp != NULL) break; /* FALLTHROUGH */ case QUEUE_CLEAN: if (metadata && pass == 0) { pass = 1; nqindex = QUEUE_EMPTY; nbp = TAILQ_FIRST(&bufqueues[nqindex]); } /* * nbp is NULL. */ break; } } /* * If we are defragging then we need a buffer with * b_kvasize != 0. This situation occurs when we * have many unmapped bufs. */ if (defrag && bp->b_kvasize == 0) continue; /* * Start freeing the bp. This is somewhat involved. nbp * remains valid only for QUEUE_EMPTY[KVA] bp's. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) continue; /* * BKGRDINPROG can only be set with the buf and bufobj * locks both held. We tolerate a race to clear it here. */ if (bp->b_vflags & BV_BKGRDINPROG) { BUF_UNLOCK(bp); continue; } /* * Requeue the background write buffer with error. */ if ((bp->b_vflags & BV_BKGRDERR) != 0) { bremfreel(bp); mtx_unlock(&bqclean); bqrelse(bp); continue; } KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistent queue %d bp %p", qindex, bp)); bremfreel(bp); mtx_unlock(&bqclean); /* * NOTE: nbp is now entirely invalid. We can only restart * the scan from this point on. */ getnewbuf_reuse_bp(bp, qindex); mtx_assert(&bqclean, MA_NOTOWNED); /* * If we are defragging then free the buffer. */ if (defrag) { bp->b_flags |= B_INVAL; brelse(bp); defrag = 0; goto restart; } /* * Notify any waiters for the buffer lock about * identity change by freeing the buffer. */ if (qindex == QUEUE_CLEAN && BUF_LOCKWAITERS(bp)) { bp->b_flags |= B_INVAL; brelse(bp); goto restart; } if (metadata) break; /* * If we are overcomitted then recover the buffer and its * KVM space. This occurs in rare situations when multiple * processes are blocked in getnewbuf() or allocbuf(). */ if (bufspace >= hibufspace && bp->b_kvasize != 0) { bp->b_flags |= B_INVAL; brelse(bp); goto restart; } break; } return (bp); } /* * getnewbuf: * * Find and initialize a new buffer header, freeing up existing buffers * in the bufqueues as necessary. The new buffer is returned locked. * * Important: B_INVAL is not set. If the caller wishes to throw the * buffer away, the caller must set B_INVAL prior to calling brelse(). * * We block if: * We have insufficient buffer headers * We have insufficient buffer space * buffer_arena is too fragmented ( space reservation fails ) * If we have to flush dirty buffers ( but we try to avoid this ) */ static struct buf * getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize, int gbflags) { struct buf *bp; int defrag, metadata; KASSERT((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); if (!unmapped_buf_allowed) gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC); defrag = 0; if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 || vp->v_type == VCHR) metadata = 1; else metadata = 0; /* * We can't afford to block since we might be holding a vnode lock, * which may prevent system daemons from running. We deal with * low-memory situations by proactively returning memory and running * async I/O rather then sync I/O. */ atomic_add_int(&getnewbufcalls, 1); restart: bp = getnewbuf_scan(maxsize, defrag, (gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_UNMAPPED, metadata); if (bp != NULL) defrag = 0; /* * If we exhausted our list, sleep as appropriate. We may have to * wakeup various daemons and write out some dirty buffers. * * Generally we are sleeping due to insufficient buffer space. */ if (bp == NULL) { mtx_assert(&bqclean, MA_OWNED); getnewbuf_bufd_help(vp, gbflags, slpflag, slptimeo, defrag); mtx_assert(&bqclean, MA_NOTOWNED); } else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == GB_UNMAPPED) { mtx_assert(&bqclean, MA_NOTOWNED); bufkvafree(bp); atomic_add_int(&bufreusecnt, 1); } else { mtx_assert(&bqclean, MA_NOTOWNED); /* * We finally have a valid bp. We aren't quite out of the * woods, we still have to reserve kva space. In order to * keep fragmentation sane we only allocate kva in BKVASIZE * chunks. */ maxsize = (maxsize + BKVAMASK) & ~BKVAMASK; if (maxsize != bp->b_kvasize && bufkvaalloc(bp, maxsize, gbflags)) { defrag = 1; bp->b_flags |= B_INVAL; brelse(bp); goto restart; } else if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) == (GB_UNMAPPED | GB_KVAALLOC)) { bp->b_data = unmapped_buf; BUF_CHECK_UNMAPPED(bp); } atomic_add_int(&bufreusecnt, 1); } return (bp); } /* * buf_daemon: * * buffer flushing daemon. Buffers are normally flushed by the * update daemon but if it cannot keep up this process starts to * take the load in an attempt to prevent getnewbuf() from blocking. */ static struct kproc_desc buf_kp = { "bufdaemon", buf_daemon, &bufdaemonproc }; SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); static int buf_flush(struct vnode *vp, int target) { int flushed; flushed = flushbufqueues(vp, target, 0); if (flushed == 0) { /* * Could not find any buffers without rollback * dependencies, so just write the first one * in the hopes of eventually making progress. */ if (vp != NULL && target > 2) target /= 2; flushbufqueues(vp, target, 1); } return (flushed); } static void buf_daemon() { int lodirty; /* * This process needs to be suspended prior to shutdown sync. */ EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc, SHUTDOWN_PRI_LAST); /* * This process is allowed to take the buffer cache to the limit */ curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED; mtx_lock(&bdlock); for (;;) { bd_request = 0; mtx_unlock(&bdlock); kproc_suspend_check(bufdaemonproc); lodirty = lodirtybuffers; if (bd_speedupreq) { lodirty = numdirtybuffers / 2; bd_speedupreq = 0; } /* * Do the flush. Limit the amount of in-transit I/O we * allow to build up, otherwise we would completely saturate * the I/O system. */ while (numdirtybuffers > lodirty) { if (buf_flush(NULL, numdirtybuffers - lodirty) == 0) break; kern_yield(PRI_USER); } /* * Only clear bd_request if we have reached our low water * mark. The buf_daemon normally waits 1 second and * then incrementally flushes any dirty buffers that have * built up, within reason. * * If we were unable to hit our low water mark and couldn't * find any flushable buffers, we sleep for a short period * to avoid endless loops on unlockable buffers. */ mtx_lock(&bdlock); if (numdirtybuffers <= lodirtybuffers) { /* * We reached our low water mark, reset the * request and sleep until we are needed again. * The sleep is just so the suspend code works. */ bd_request = 0; /* * Do an extra wakeup in case dirty threshold * changed via sysctl and the explicit transition * out of shortfall was missed. */ bdirtywakeup(); if (runningbufspace <= lorunningspace) runningwakeup(); msleep(&bd_request, &bdlock, PVM, "psleep", hz); } else { /* * We couldn't find any flushable dirty buffers but * still have too many dirty buffers, we * have to sleep and try again. (rare) */ msleep(&bd_request, &bdlock, PVM, "qsleep", hz / 10); } } } /* * flushbufqueues: * * Try to flush a buffer in the dirty queue. We must be careful to * free up B_INVAL buffers instead of write them, which NFS is * particularly sensitive to. */ static int flushwithdeps = 0; SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps, 0, "Number of buffers flushed with dependecies that require rollbacks"); static int flushbufqueues(struct vnode *lvp, int target, int flushdeps) { struct buf *sentinel; struct vnode *vp; struct mount *mp; struct buf *bp; int hasdeps; int flushed; int queue; int error; bool unlock; flushed = 0; queue = QUEUE_DIRTY; bp = NULL; sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO); sentinel->b_qindex = QUEUE_SENTINEL; mtx_lock(&bqdirty); TAILQ_INSERT_HEAD(&bufqueues[queue], sentinel, b_freelist); mtx_unlock(&bqdirty); while (flushed != target) { maybe_yield(); mtx_lock(&bqdirty); bp = TAILQ_NEXT(sentinel, b_freelist); if (bp != NULL) { TAILQ_REMOVE(&bufqueues[queue], sentinel, b_freelist); TAILQ_INSERT_AFTER(&bufqueues[queue], bp, sentinel, b_freelist); } else { mtx_unlock(&bqdirty); break; } /* * Skip sentinels inserted by other invocations of the * flushbufqueues(), taking care to not reorder them. * * Only flush the buffers that belong to the * vnode locked by the curthread. */ if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL && bp->b_vp != lvp)) { mtx_unlock(&bqdirty); continue; } error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL); mtx_unlock(&bqdirty); if (error != 0) continue; if (bp->b_pin_count > 0) { BUF_UNLOCK(bp); continue; } /* * BKGRDINPROG can only be set with the buf and bufobj * locks both held. We tolerate a race to clear it here. */ if ((bp->b_vflags & BV_BKGRDINPROG) != 0 || (bp->b_flags & B_DELWRI) == 0) { BUF_UNLOCK(bp); continue; } if (bp->b_flags & B_INVAL) { bremfreef(bp); brelse(bp); flushed++; continue; } if (!LIST_EMPTY(&bp->b_dep) && buf_countdeps(bp, 0)) { if (flushdeps == 0) { BUF_UNLOCK(bp); continue; } hasdeps = 1; } else hasdeps = 0; /* * We must hold the lock on a vnode before writing * one of its buffers. Otherwise we may confuse, or * in the case of a snapshot vnode, deadlock the * system. * * The lock order here is the reverse of the normal * of vnode followed by buf lock. This is ok because * the NOWAIT will prevent deadlock. */ vp = bp->b_vp; if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { BUF_UNLOCK(bp); continue; } if (lvp == NULL) { unlock = true; error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); } else { ASSERT_VOP_LOCKED(vp, "getbuf"); unlock = false; error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 : vn_lock(vp, LK_TRYUPGRADE); } if (error == 0) { CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); if (curproc == bufdaemonproc) { vfs_bio_awrite(bp); } else { bremfree(bp); bwrite(bp); notbufdflushes++; } vn_finished_write(mp); if (unlock) VOP_UNLOCK(vp, 0); flushwithdeps += hasdeps; flushed++; /* * Sleeping on runningbufspace while holding * vnode lock leads to deadlock. */ if (curproc == bufdaemonproc && runningbufspace > hirunningspace) waitrunningbufspace(); continue; } vn_finished_write(mp); BUF_UNLOCK(bp); } mtx_lock(&bqdirty); TAILQ_REMOVE(&bufqueues[queue], sentinel, b_freelist); mtx_unlock(&bqdirty); free(sentinel, M_TEMP); return (flushed); } /* * Check to see if a block is currently memory resident. */ struct buf * incore(struct bufobj *bo, daddr_t blkno) { struct buf *bp; BO_RLOCK(bo); bp = gbincore(bo, blkno); BO_RUNLOCK(bo); return (bp); } /* * Returns true if no I/O is needed to access the * associated VM object. This is like incore except * it also hunts around in the VM system for the data. */ static int inmem(struct vnode * vp, daddr_t blkno) { vm_object_t obj; vm_offset_t toff, tinc, size; vm_page_t m; vm_ooffset_t off; ASSERT_VOP_LOCKED(vp, "inmem"); if (incore(&vp->v_bufobj, blkno)) return 1; if (vp->v_mount == NULL) return 0; obj = vp->v_object; if (obj == NULL) return (0); size = PAGE_SIZE; if (size > vp->v_mount->mnt_stat.f_iosize) size = vp->v_mount->mnt_stat.f_iosize; off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize; VM_OBJECT_RLOCK(obj); for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { m = vm_page_lookup(obj, OFF_TO_IDX(off + toff)); if (!m) goto notinmem; tinc = size; if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK)) tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK); if (vm_page_is_valid(m, (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0) goto notinmem; } VM_OBJECT_RUNLOCK(obj); return 1; notinmem: VM_OBJECT_RUNLOCK(obj); return (0); } /* * Set the dirty range for a buffer based on the status of the dirty * bits in the pages comprising the buffer. The range is limited * to the size of the buffer. * * Tell the VM system that the pages associated with this buffer * are clean. This is used for delayed writes where the data is * going to go to disk eventually without additional VM intevention. * * Note that while we only really need to clean through to b_bcount, we * just go ahead and clean through to b_bufsize. */ static void vfs_clean_pages_dirty_buf(struct buf *bp) { vm_ooffset_t foff, noff, eoff; vm_page_t m; int i; if ((bp->b_flags & B_VMIO) == 0 || bp->b_bufsize == 0) return; foff = bp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("vfs_clean_pages_dirty_buf: no buffer offset")); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); vfs_drain_busy_pages(bp); vfs_setdirty_locked_object(bp); for (i = 0; i < bp->b_npages; i++) { noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; eoff = noff; if (eoff > bp->b_offset + bp->b_bufsize) eoff = bp->b_offset + bp->b_bufsize; m = bp->b_pages[i]; vfs_page_set_validclean(bp, foff, m); /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */ foff = noff; } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); } static void vfs_setdirty_locked_object(struct buf *bp) { vm_object_t object; int i; object = bp->b_bufobj->bo_object; VM_OBJECT_ASSERT_WLOCKED(object); /* * We qualify the scan for modified pages on whether the * object has been flushed yet. */ if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) { vm_offset_t boffset; vm_offset_t eoffset; /* * test the pages to see if they have been modified directly * by users through the VM system. */ for (i = 0; i < bp->b_npages; i++) vm_page_test_dirty(bp->b_pages[i]); /* * Calculate the encompassing dirty range, boffset and eoffset, * (eoffset - boffset) bytes. */ for (i = 0; i < bp->b_npages; i++) { if (bp->b_pages[i]->dirty) break; } boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); for (i = bp->b_npages - 1; i >= 0; --i) { if (bp->b_pages[i]->dirty) { break; } } eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK); /* * Fit it to the buffer. */ if (eoffset > bp->b_bcount) eoffset = bp->b_bcount; /* * If we have a good dirty range, merge with the existing * dirty range. */ if (boffset < eoffset) { if (bp->b_dirtyoff > boffset) bp->b_dirtyoff = boffset; if (bp->b_dirtyend < eoffset) bp->b_dirtyend = eoffset; } } } /* * Allocate the KVA mapping for an existing buffer. * If an unmapped buffer is provided but a mapped buffer is requested, take * also care to properly setup mappings between pages and KVA. */ static void bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags) { struct buf *scratch_bp; int bsize, maxsize, need_mapping, need_kva; off_t offset; need_mapping = bp->b_data == unmapped_buf && (gbflags & GB_UNMAPPED) == 0; need_kva = bp->b_kvabase == unmapped_buf && bp->b_data == unmapped_buf && (gbflags & GB_KVAALLOC) != 0; if (!need_mapping && !need_kva) return; BUF_CHECK_UNMAPPED(bp); if (need_mapping && bp->b_kvabase != unmapped_buf) { /* * Buffer is not mapped, but the KVA was already * reserved at the time of the instantiation. Use the * allocated space. */ goto has_addr; } /* * Calculate the amount of the address space we would reserve * if the buffer was mapped. */ bsize = vn_isdisk(bp->b_vp, NULL) ? DEV_BSIZE : bp->b_bufobj->bo_bsize; KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); offset = blkno * bsize; maxsize = size + (offset & PAGE_MASK); maxsize = imax(maxsize, bsize); mapping_loop: if (bufkvaalloc(bp, maxsize, gbflags)) { /* * Request defragmentation. getnewbuf() returns us the * allocated space by the scratch buffer KVA. */ scratch_bp = getnewbuf(bp->b_vp, 0, 0, size, maxsize, gbflags | (GB_UNMAPPED | GB_KVAALLOC)); if (scratch_bp == NULL) { if ((gbflags & GB_NOWAIT_BD) != 0) { /* * XXXKIB: defragmentation cannot * succeed, not sure what else to do. */ panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp); } atomic_add_int(&mappingrestarts, 1); goto mapping_loop; } KASSERT(scratch_bp->b_kvabase != unmapped_buf, ("scratch bp has no KVA %p", scratch_bp)); /* Grab pointers. */ bp->b_kvabase = scratch_bp->b_kvabase; bp->b_kvasize = scratch_bp->b_kvasize; bp->b_data = scratch_bp->b_data; /* Get rid of the scratch buffer. */ scratch_bp->b_kvasize = 0; scratch_bp->b_flags |= B_INVAL; scratch_bp->b_data = scratch_bp->b_kvabase = unmapped_buf; brelse(scratch_bp); } has_addr: if (need_mapping) { /* b_offset is handled by bpmap_qenter. */ bp->b_data = bp->b_kvabase; BUF_CHECK_MAPPED(bp); bpmap_qenter(bp); } } /* * getblk: * * Get a block given a specified block and offset into a file/device. * The buffers B_DONE bit will be cleared on return, making it almost * ready for an I/O initiation. B_INVAL may or may not be set on * return. The caller should clear B_INVAL prior to initiating a * READ. * * For a non-VMIO buffer, B_CACHE is set to the opposite of B_INVAL for * an existing buffer. * * For a VMIO buffer, B_CACHE is modified according to the backing VM. * If getblk()ing a previously 0-sized invalid buffer, B_CACHE is set * and then cleared based on the backing VM. If the previous buffer is * non-0-sized but invalid, B_CACHE will be cleared. * * If getblk() must create a new buffer, the new buffer is returned with * both B_INVAL and B_CACHE clear unless it is a VMIO buffer, in which * case it is returned with B_INVAL clear and B_CACHE set based on the * backing VM. * * getblk() also forces a bwrite() for any B_DELWRI buffer whos * B_CACHE bit is clear. * * What this means, basically, is that the caller should use B_CACHE to * determine whether the buffer is fully valid or not and should clear * B_INVAL prior to issuing a read. If the caller intends to validate * the buffer by loading its data area with something, the caller needs * to clear B_INVAL. If the caller does this without issuing an I/O, * the caller should set B_CACHE ( as an optimization ), else the caller * should issue the I/O and biodone() will set B_CACHE if the I/O was * a write attempt or if it was a successfull read. If the caller * intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR * prior to issuing the READ. biodone() will *not* clear B_INVAL. */ struct buf * getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo, int flags) { struct buf *bp; struct bufobj *bo; int bsize, error, maxsize, vmio; off_t offset; CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size); KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); ASSERT_VOP_LOCKED(vp, "getblk"); if (size > MAXBCACHEBUF) panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, MAXBCACHEBUF); if (!unmapped_buf_allowed) flags &= ~(GB_UNMAPPED | GB_KVAALLOC); bo = &vp->v_bufobj; loop: BO_RLOCK(bo); bp = gbincore(bo, blkno); if (bp != NULL) { int lockflags; /* * Buffer is in-core. If the buffer is not busy nor managed, * it must be on a queue. */ lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK; if (flags & GB_LOCK_NOWAIT) lockflags |= LK_NOWAIT; error = BUF_TIMELOCK(bp, lockflags, BO_LOCKPTR(bo), "getblk", slpflag, slptimeo); /* * If we slept and got the lock we have to restart in case * the buffer changed identities. */ if (error == ENOLCK) goto loop; /* We timed out or were interrupted. */ else if (error) return (NULL); /* If recursed, assume caller knows the rules. */ else if (BUF_LOCKRECURSED(bp)) goto end; /* * The buffer is locked. B_CACHE is cleared if the buffer is * invalid. Otherwise, for a non-VMIO buffer, B_CACHE is set * and for a VMIO buffer B_CACHE is adjusted according to the * backing VM cache. */ if (bp->b_flags & B_INVAL) bp->b_flags &= ~B_CACHE; else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0) bp->b_flags |= B_CACHE; if (bp->b_flags & B_MANAGED) MPASS(bp->b_qindex == QUEUE_NONE); else bremfree(bp); /* * check for size inconsistencies for non-VMIO case. */ if (bp->b_bcount != size) { if ((bp->b_flags & B_VMIO) == 0 || (size > bp->b_kvasize)) { if (bp->b_flags & B_DELWRI) { /* * If buffer is pinned and caller does * not want sleep waiting for it to be * unpinned, bail out * */ if (bp->b_pin_count > 0) { if (flags & GB_LOCK_NOWAIT) { bqrelse(bp); return (NULL); } else { bunpin_wait(bp); } } bp->b_flags |= B_NOCACHE; bwrite(bp); } else { if (LIST_EMPTY(&bp->b_dep)) { bp->b_flags |= B_RELBUF; brelse(bp); } else { bp->b_flags |= B_NOCACHE; bwrite(bp); } } goto loop; } } /* * Handle the case of unmapped buffer which should * become mapped, or the buffer for which KVA * reservation is requested. */ bp_unmapped_get_kva(bp, blkno, size, flags); /* * If the size is inconsistant in the VMIO case, we can resize * the buffer. This might lead to B_CACHE getting set or * cleared. If the size has not changed, B_CACHE remains * unchanged from its previous state. */ if (bp->b_bcount != size) allocbuf(bp, size); KASSERT(bp->b_offset != NOOFFSET, ("getblk: no buffer offset")); /* * A buffer with B_DELWRI set and B_CACHE clear must * be committed before we can return the buffer in * order to prevent the caller from issuing a read * ( due to B_CACHE not being set ) and overwriting * it. * * Most callers, including NFS and FFS, need this to * operate properly either because they assume they * can issue a read if B_CACHE is not set, or because * ( for example ) an uncached B_DELWRI might loop due * to softupdates re-dirtying the buffer. In the latter * case, B_CACHE is set after the first write completes, * preventing further loops. * NOTE! b*write() sets B_CACHE. If we cleared B_CACHE * above while extending the buffer, we cannot allow the * buffer to remain with B_CACHE set after the write * completes or it will represent a corrupt state. To * deal with this we set B_NOCACHE to scrap the buffer * after the write. * * We might be able to do something fancy, like setting * B_CACHE in bwrite() except if B_DELWRI is already set, * so the below call doesn't set B_CACHE, but that gets real * confusing. This is much easier. */ if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) { bp->b_flags |= B_NOCACHE; bwrite(bp); goto loop; } bp->b_flags &= ~B_DONE; } else { /* * Buffer is not in-core, create new buffer. The buffer * returned by getnewbuf() is locked. Note that the returned * buffer is also considered valid (not marked B_INVAL). */ BO_RUNLOCK(bo); /* * If the user does not want us to create the buffer, bail out * here. */ if (flags & GB_NOCREAT) return NULL; if (numfreebuffers == 0 && TD_IS_IDLETHREAD(curthread)) return NULL; bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize; KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); offset = blkno * bsize; vmio = vp->v_object != NULL; if (vmio) { maxsize = size + (offset & PAGE_MASK); } else { maxsize = size; /* Do not allow non-VMIO notmapped buffers. */ flags &= ~(GB_UNMAPPED | GB_KVAALLOC); } maxsize = imax(maxsize, bsize); bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize, flags); if (bp == NULL) { if (slpflag || slptimeo) return NULL; goto loop; } /* * This code is used to make sure that a buffer is not * created while the getnewbuf routine is blocked. * This can be a problem whether the vnode is locked or not. * If the buffer is created out from under us, we have to * throw away the one we just created. * * Note: this must occur before we associate the buffer * with the vp especially considering limitations in * the splay tree implementation when dealing with duplicate * lblkno's. */ BO_LOCK(bo); if (gbincore(bo, blkno)) { BO_UNLOCK(bo); bp->b_flags |= B_INVAL; brelse(bp); goto loop; } /* * Insert the buffer into the hash, so that it can * be found by incore. */ bp->b_blkno = bp->b_lblkno = blkno; bp->b_offset = offset; bgetvp(vp, bp); BO_UNLOCK(bo); /* * set B_VMIO bit. allocbuf() the buffer bigger. Since the * buffer size starts out as 0, B_CACHE will be set by * allocbuf() for the VMIO case prior to it testing the * backing store for validity. */ if (vmio) { bp->b_flags |= B_VMIO; KASSERT(vp->v_object == bp->b_bufobj->bo_object, ("ARGH! different b_bufobj->bo_object %p %p %p\n", bp, vp->v_object, bp->b_bufobj->bo_object)); } else { bp->b_flags &= ~B_VMIO; KASSERT(bp->b_bufobj->bo_object == NULL, ("ARGH! has b_bufobj->bo_object %p %p\n", bp, bp->b_bufobj->bo_object)); BUF_CHECK_MAPPED(bp); } allocbuf(bp, size); bp->b_flags &= ~B_DONE; } CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); BUF_ASSERT_HELD(bp); end: KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); return (bp); } /* * Get an empty, disassociated buffer of given size. The buffer is initially * set to B_INVAL. */ struct buf * geteblk(int size, int flags) { struct buf *bp; int maxsize; maxsize = (size + BKVAMASK) & ~BKVAMASK; while ((bp = getnewbuf(NULL, 0, 0, size, maxsize, flags)) == NULL) { if ((flags & GB_NOWAIT_BD) && (curthread->td_pflags & TDP_BUFNEED) != 0) return (NULL); } allocbuf(bp, size); bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ BUF_ASSERT_HELD(bp); return (bp); } /* * This code constitutes the buffer memory from either anonymous system * memory (in the case of non-VMIO operations) or from an associated * VM object (in the case of VMIO operations). This code is able to * resize a buffer up or down. * * Note that this code is tricky, and has many complications to resolve * deadlock or inconsistant data situations. Tread lightly!!! * There are B_CACHE and B_DELWRI interactions that must be dealt with by * the caller. Calling this code willy nilly can result in the loss of data. * * allocbuf() only adjusts B_CACHE for VMIO buffers. getblk() deals with * B_CACHE for the non-VMIO case. */ int allocbuf(struct buf *bp, int size) { int newbsize, mbsize; int i; BUF_ASSERT_HELD(bp); if (bp->b_kvasize != 0 && bp->b_kvasize < size) panic("allocbuf: buffer too small"); if ((bp->b_flags & B_VMIO) == 0) { caddr_t origbuf; int origbufsize; /* * Just get anonymous memory from the kernel. Don't * mess with B_CACHE. */ mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); if (bp->b_flags & B_MALLOC) newbsize = mbsize; else newbsize = round_page(size); if (newbsize < bp->b_bufsize) { /* * malloced buffers are not shrunk */ if (bp->b_flags & B_MALLOC) { if (newbsize) { bp->b_bcount = size; } else { free(bp->b_data, M_BIOBUF); bufmallocadjust(bp, 0); bp->b_data = bp->b_kvabase; bp->b_bcount = 0; bp->b_flags &= ~B_MALLOC; } return 1; } vm_hold_free_pages(bp, newbsize); } else if (newbsize > bp->b_bufsize) { /* * We only use malloced memory on the first allocation. * and revert to page-allocated memory when the buffer * grows. */ /* * There is a potential smp race here that could lead * to bufmallocspace slightly passing the max. It * is probably extremely rare and not worth worrying * over. */ if ((bufmallocspace < maxbufmallocspace) && (bp->b_bufsize == 0) && (mbsize <= PAGE_SIZE/2)) { bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK); bp->b_bcount = size; bp->b_flags |= B_MALLOC; bufmallocadjust(bp, mbsize); return 1; } origbuf = NULL; origbufsize = 0; /* * If the buffer is growing on its other-than-first * allocation then we revert to the page-allocation * scheme. */ if (bp->b_flags & B_MALLOC) { origbuf = bp->b_data; origbufsize = bp->b_bufsize; bp->b_data = bp->b_kvabase; bufmallocadjust(bp, 0); bp->b_flags &= ~B_MALLOC; newbsize = round_page(newbsize); } vm_hold_load_pages( bp, (vm_offset_t) bp->b_data + bp->b_bufsize, (vm_offset_t) bp->b_data + newbsize); if (origbuf) { bcopy(origbuf, bp->b_data, origbufsize); free(origbuf, M_BIOBUF); } } } else { int desiredpages; newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); desiredpages = (size == 0) ? 0 : num_pages((bp->b_offset & PAGE_MASK) + newbsize); if (bp->b_flags & B_MALLOC) panic("allocbuf: VMIO buffer can't be malloced"); /* * Set B_CACHE initially if buffer is 0 length or will become * 0-length. */ if (size == 0 || bp->b_bufsize == 0) bp->b_flags |= B_CACHE; if (newbsize < bp->b_bufsize) { /* * DEV_BSIZE aligned new buffer size is less then the * DEV_BSIZE aligned existing buffer size. Figure out * if we have to remove any pages. */ if (desiredpages < bp->b_npages) { vm_page_t m; if (buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); pmap_qremove((vm_offset_t)trunc_page( (vm_offset_t)bp->b_data) + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); } else BUF_CHECK_UNMAPPED(bp); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it * is the responsibility of * vnode_pager_setsize */ m = bp->b_pages[i]; KASSERT(m != bogus_page, ("allocbuf: bogus page found")); while (vm_page_sleep_if_busy(m, "biodep")) continue; bp->b_pages[i] = NULL; vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); bp->b_npages = desiredpages; } } else if (size > bp->b_bcount) { /* * We are growing the buffer, possibly in a * byte-granular fashion. */ vm_object_t obj; vm_offset_t toff; vm_offset_t tinc; /* * Step 1, bring in the VM pages from the object, * allocating them if necessary. We must clear * B_CACHE if these pages are not valid for the * range covered by the buffer. */ obj = bp->b_bufobj->bo_object; VM_OBJECT_WLOCK(obj); while (bp->b_npages < desiredpages) { vm_page_t m; /* * We must allocate system pages since blocking * here could interfere with paging I/O, no * matter which process we are. * * Only exclusive busy can be tested here. * Blocking on shared busy might lead to * deadlocks once allocbuf() is called after * pages are vfs_busy_pages(). */ m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + bp->b_npages, VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | VM_ALLOC_COUNT(desiredpages - bp->b_npages)); if (m->valid == 0) bp->b_flags &= ~B_CACHE; bp->b_pages[bp->b_npages] = m; ++bp->b_npages; } /* * Step 2. We've loaded the pages into the buffer, * we have to figure out if we can still have B_CACHE * set. Note that B_CACHE is set according to the * byte-granular range ( bcount and size ), new the * aligned range ( newbsize ). * * The VM test is against m->valid, which is DEV_BSIZE * aligned. Needless to say, the validity of the data * needs to also be DEV_BSIZE aligned. Note that this * fails with NFS if the server or some other client * extends the file's EOF. If our buffer is resized, * B_CACHE may remain set! XXX */ toff = bp->b_bcount; tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK); while ((bp->b_flags & B_CACHE) && toff < size) { vm_pindex_t pi; if (tinc > (size - toff)) tinc = size - toff; pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT; vfs_buf_test_cache( bp, bp->b_offset, toff, tinc, bp->b_pages[pi] ); toff += tinc; tinc = PAGE_SIZE; } VM_OBJECT_WUNLOCK(obj); /* * Step 3, fixup the KVA pmap. */ if (buf_mapped(bp)) bpmap_qenter(bp); else BUF_CHECK_UNMAPPED(bp); } } /* Record changes in allocation size. */ if (bp->b_bufsize != newbsize) bufspaceadjust(bp, newbsize); bp->b_bcount = size; /* requested buffer size. */ return 1; } extern int inflight_transient_maps; void biodone(struct bio *bp) { struct mtx *mtxp; void (*done)(struct bio *); vm_offset_t start, end; if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) { bp->bio_flags &= ~BIO_TRANSIENT_MAPPING; bp->bio_flags |= BIO_UNMAPPED; start = trunc_page((vm_offset_t)bp->bio_data); end = round_page((vm_offset_t)bp->bio_data + bp->bio_length); bp->bio_data = unmapped_buf; pmap_qremove(start, OFF_TO_IDX(end - start)); vmem_free(transient_arena, start, end - start); atomic_add_int(&inflight_transient_maps, -1); } done = bp->bio_done; if (done == NULL) { mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); bp->bio_flags |= BIO_DONE; wakeup(bp); mtx_unlock(mtxp); } else { bp->bio_flags |= BIO_DONE; done(bp); } } /* * Wait for a BIO to finish. */ int biowait(struct bio *bp, const char *wchan) { struct mtx *mtxp; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); while ((bp->bio_flags & BIO_DONE) == 0) msleep(bp, mtxp, PRIBIO, wchan, 0); mtx_unlock(mtxp); if (bp->bio_error != 0) return (bp->bio_error); if (!(bp->bio_flags & BIO_ERROR)) return (0); return (EIO); } void biofinish(struct bio *bp, struct devstat *stat, int error) { if (error) { bp->bio_error = error; bp->bio_flags |= BIO_ERROR; } if (stat != NULL) devstat_end_transaction_bio(stat, bp); biodone(bp); } /* * bufwait: * * Wait for buffer I/O completion, returning error status. The buffer * is left locked and B_DONE on return. B_EINTR is converted into an EINTR * error and cleared. */ int bufwait(struct buf *bp) { if (bp->b_iocmd == BIO_READ) bwait(bp, PRIBIO, "biord"); else bwait(bp, PRIBIO, "biowr"); if (bp->b_flags & B_EINTR) { bp->b_flags &= ~B_EINTR; return (EINTR); } if (bp->b_ioflags & BIO_ERROR) { return (bp->b_error ? bp->b_error : EIO); } else { return (0); } } /* * bufdone: * * Finish I/O on a buffer, optionally calling a completion function. * This is usually called from an interrupt so process blocking is * not allowed. * * biodone is also responsible for setting B_CACHE in a B_VMIO bp. * In a non-VMIO bp, B_CACHE will be set on the next getblk() * assuming B_INVAL is clear. * * For the VMIO case, we set B_CACHE if the op was a read and no * read error occured, or if the op was a write. B_CACHE is never * set if the buffer is invalid or otherwise uncacheable. * * biodone does not mess with B_INVAL, allowing the I/O routine or the * initiator to leave B_INVAL set to brelse the buffer out of existance * in the biodone routine. */ void bufdone(struct buf *bp) { struct bufobj *dropobj; void (*biodone)(struct buf *); CTR3(KTR_BUF, "bufdone(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); dropobj = NULL; KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp)); BUF_ASSERT_HELD(bp); runningbufwakeup(bp); if (bp->b_iocmd == BIO_WRITE) dropobj = bp->b_bufobj; /* call optional completion function if requested */ if (bp->b_iodone != NULL) { biodone = bp->b_iodone; bp->b_iodone = NULL; (*biodone) (bp); if (dropobj) bufobj_wdrop(dropobj); return; } bufdone_finish(bp); if (dropobj) bufobj_wdrop(dropobj); } void bufdone_finish(struct buf *bp) { BUF_ASSERT_HELD(bp); if (!LIST_EMPTY(&bp->b_dep)) buf_complete(bp); if (bp->b_flags & B_VMIO) { vm_ooffset_t foff; vm_page_t m; vm_object_t obj; struct vnode *vp; int bogus, i, iosize; obj = bp->b_bufobj->bo_object; KASSERT(obj->paging_in_progress >= bp->b_npages, ("biodone_finish: paging in progress(%d) < b_npages(%d)", obj->paging_in_progress, bp->b_npages)); vp = bp->b_vp; KASSERT(vp->v_holdcnt > 0, ("biodone_finish: vnode %p has zero hold count", vp)); KASSERT(vp->v_object != NULL, ("biodone_finish: vnode %p has no vm_object", vp)); foff = bp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("biodone_finish: bp %p has no buffer offset", bp)); /* * Set B_CACHE if the op was a normal read and no error * occured. B_CACHE is set for writes in the b*write() * routines. */ iosize = bp->b_bcount - bp->b_resid; if (bp->b_iocmd == BIO_READ && !(bp->b_flags & (B_INVAL|B_NOCACHE)) && !(bp->b_ioflags & BIO_ERROR)) { bp->b_flags |= B_CACHE; } bogus = 0; VM_OBJECT_WLOCK(obj); for (i = 0; i < bp->b_npages; i++) { int bogusflag = 0; int resid; resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; if (resid > iosize) resid = iosize; /* * cleanup bogus pages, restoring the originals */ m = bp->b_pages[i]; if (m == bogus_page) { bogus = bogusflag = 1; m = vm_page_lookup(obj, OFF_TO_IDX(foff)); if (m == NULL) panic("biodone: page disappeared!"); bp->b_pages[i] = m; } KASSERT(OFF_TO_IDX(foff) == m->pindex, ("biodone_finish: foff(%jd)/pindex(%ju) mismatch", (intmax_t)foff, (uintmax_t)m->pindex)); /* * In the write case, the valid and clean bits are * already changed correctly ( see bdwrite() ), so we * only need to do this here in the read case. */ if ((bp->b_iocmd == BIO_READ) && !bogusflag && resid > 0) { KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK, resid)) == 0, ("bufdone_finish:" " page %p has unexpected dirty bits", m)); vfs_page_set_valid(bp, foff, m); } vm_page_sunbusy(m); vm_object_pip_subtract(obj, 1); foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; iosize -= resid; } vm_object_pip_wakeupn(obj, 0); VM_OBJECT_WUNLOCK(obj); if (bogus && buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); } } /* * For asynchronous completions, release the buffer now. The brelse * will do a wakeup there if necessary - so no need to do a wakeup * here in the async case. The sync case always needs to do a wakeup. */ if (bp->b_flags & B_ASYNC) { if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) || (bp->b_ioflags & BIO_ERROR)) brelse(bp); else bqrelse(bp); } else bdone(bp); } /* * This routine is called in lieu of iodone in the case of * incomplete I/O. This keeps the busy status for pages * consistant. */ void vfs_unbusy_pages(struct buf *bp) { int i; vm_object_t obj; vm_page_t m; runningbufwakeup(bp); if (!(bp->b_flags & B_VMIO)) return; obj = bp->b_bufobj->bo_object; VM_OBJECT_WLOCK(obj); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; if (m == bogus_page) { m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i); if (!m) panic("vfs_unbusy_pages: page missing\n"); bp->b_pages[i] = m; if (buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); } else BUF_CHECK_UNMAPPED(bp); } vm_object_pip_subtract(obj, 1); vm_page_sunbusy(m); } vm_object_pip_wakeupn(obj, 0); VM_OBJECT_WUNLOCK(obj); } /* * vfs_page_set_valid: * * Set the valid bits in a page based on the supplied offset. The * range is restricted to the buffer's size. * * This routine is typically called after a read completes. */ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m) { vm_ooffset_t eoff; /* * Compute the end offset, eoff, such that [off, eoff) does not span a * page boundary and eoff is not greater than the end of the buffer. * The end of the buffer, in this case, is our file EOF, not the * allocation size of the buffer. */ eoff = (off + PAGE_SIZE) & ~(vm_ooffset_t)PAGE_MASK; if (eoff > bp->b_offset + bp->b_bcount) eoff = bp->b_offset + bp->b_bcount; /* * Set valid range. This is typically the entire buffer and thus the * entire page. */ if (eoff > off) vm_page_set_valid_range(m, off & PAGE_MASK, eoff - off); } /* * vfs_page_set_validclean: * * Set the valid bits and clear the dirty bits in a page based on the * supplied offset. The range is restricted to the buffer's size. */ static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m) { vm_ooffset_t soff, eoff; /* * Start and end offsets in buffer. eoff - soff may not cross a * page boundry or cross the end of the buffer. The end of the * buffer, in this case, is our file EOF, not the allocation size * of the buffer. */ soff = off; eoff = (off + PAGE_SIZE) & ~(off_t)PAGE_MASK; if (eoff > bp->b_offset + bp->b_bcount) eoff = bp->b_offset + bp->b_bcount; /* * Set valid range. This is typically the entire buffer and thus the * entire page. */ if (eoff > soff) { vm_page_set_validclean( m, (vm_offset_t) (soff & PAGE_MASK), (vm_offset_t) (eoff - soff) ); } } /* * Ensure that all buffer pages are not exclusive busied. If any page is * exclusive busy, drain it. */ void vfs_drain_busy_pages(struct buf *bp) { vm_page_t m; int i, last_busied; VM_OBJECT_ASSERT_WLOCKED(bp->b_bufobj->bo_object); last_busied = 0; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; if (vm_page_xbusied(m)) { for (; last_busied < i; last_busied++) vm_page_sbusy(bp->b_pages[last_busied]); while (vm_page_xbusied(m)) { vm_page_lock(m); VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); vm_page_busy_sleep(m, "vbpage"); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); } } } for (i = 0; i < last_busied; i++) vm_page_sunbusy(bp->b_pages[i]); } /* * This routine is called before a device strategy routine. * It is used to tell the VM system that paging I/O is in * progress, and treat the pages associated with the buffer * almost as being exclusive busy. Also the object paging_in_progress * flag is handled to make sure that the object doesn't become * inconsistant. * * Since I/O has not been initiated yet, certain buffer flags * such as BIO_ERROR or B_INVAL may be in an inconsistant state * and should be ignored. */ void vfs_busy_pages(struct buf *bp, int clear_modify) { int i, bogus; vm_object_t obj; vm_ooffset_t foff; vm_page_t m; if (!(bp->b_flags & B_VMIO)) return; obj = bp->b_bufobj->bo_object; foff = bp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("vfs_busy_pages: no buffer offset")); VM_OBJECT_WLOCK(obj); vfs_drain_busy_pages(bp); if (bp->b_bufsize != 0) vfs_setdirty_locked_object(bp); bogus = 0; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; if ((bp->b_flags & B_CLUSTER) == 0) { vm_object_pip_add(obj, 1); vm_page_sbusy(m); } /* * When readying a buffer for a read ( i.e * clear_modify == 0 ), it is important to do * bogus_page replacement for valid pages in * partially instantiated buffers. Partially * instantiated buffers can, in turn, occur when * reconstituting a buffer from its VM backing store * base. We only have to do this if B_CACHE is * clear ( which causes the I/O to occur in the * first place ). The replacement prevents the read * I/O from overwriting potentially dirty VM-backed * pages. XXX bogus page replacement is, uh, bogus. * It may not work properly with small-block devices. * We need to find a better way. */ if (clear_modify) { pmap_remove_write(m); vfs_page_set_validclean(bp, foff, m); } else if (m->valid == VM_PAGE_BITS_ALL && (bp->b_flags & B_CACHE) == 0) { bp->b_pages[i] = bogus_page; bogus++; } foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; } VM_OBJECT_WUNLOCK(obj); if (bogus && buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); } } /* * vfs_bio_set_valid: * * Set the range within the buffer to valid. The range is * relative to the beginning of the buffer, b_offset. Note that * b_offset itself may be offset from the beginning of the first * page. */ void vfs_bio_set_valid(struct buf *bp, int base, int size) { int i, n; vm_page_t m; if (!(bp->b_flags & B_VMIO)) return; /* * Fixup base to be relative to beginning of first page. * Set initial n to be the maximum number of bytes in the * first page that can be validated. */ base += (bp->b_offset & PAGE_MASK); n = PAGE_SIZE - (base & PAGE_MASK); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { m = bp->b_pages[i]; if (n > size) n = size; vm_page_set_valid_range(m, base & PAGE_MASK, n); base += n; size -= n; n = PAGE_SIZE; } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); } /* * vfs_bio_clrbuf: * * If the specified buffer is a non-VMIO buffer, clear the entire * buffer. If the specified buffer is a VMIO buffer, clear and * validate only the previously invalid portions of the buffer. * This routine essentially fakes an I/O, so we need to clear * BIO_ERROR and B_INVAL. * * Note that while we only theoretically need to clear through b_bcount, * we go ahead and clear through b_bufsize. */ void vfs_bio_clrbuf(struct buf *bp) { int i, j, mask, sa, ea, slide; if ((bp->b_flags & (B_VMIO | B_MALLOC)) != B_VMIO) { clrbuf(bp); return; } bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); if ((bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) && (bp->b_offset & PAGE_MASK) == 0) { if (bp->b_pages[0] == bogus_page) goto unlock; mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1; VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[0]->object); if ((bp->b_pages[0]->valid & mask) == mask) goto unlock; if ((bp->b_pages[0]->valid & mask) == 0) { pmap_zero_page_area(bp->b_pages[0], 0, bp->b_bufsize); bp->b_pages[0]->valid |= mask; goto unlock; } } sa = bp->b_offset & PAGE_MASK; slide = 0; for (i = 0; i < bp->b_npages; i++, sa = 0) { slide = imin(slide + PAGE_SIZE, bp->b_offset + bp->b_bufsize); ea = slide & PAGE_MASK; if (ea == 0) ea = PAGE_SIZE; if (bp->b_pages[i] == bogus_page) continue; j = sa / DEV_BSIZE; mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j; VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[i]->object); if ((bp->b_pages[i]->valid & mask) == mask) continue; if ((bp->b_pages[i]->valid & mask) == 0) pmap_zero_page_area(bp->b_pages[i], sa, ea - sa); else { for (; sa < ea; sa += DEV_BSIZE, j++) { if ((bp->b_pages[i]->valid & (1 << j)) == 0) { pmap_zero_page_area(bp->b_pages[i], sa, DEV_BSIZE); } } } bp->b_pages[i]->valid |= mask; } unlock: VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); bp->b_resid = 0; } void vfs_bio_bzero_buf(struct buf *bp, int base, int size) { vm_page_t m; int i, n; if (buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); bzero(bp->b_data + base, size); } else { BUF_CHECK_UNMAPPED(bp); n = PAGE_SIZE - (base & PAGE_MASK); for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { m = bp->b_pages[i]; if (n > size) n = size; pmap_zero_page_area(m, base & PAGE_MASK, n); base += n; size -= n; n = PAGE_SIZE; } } } /* * vm_hold_load_pages and vm_hold_free_pages get pages into * a buffers address space. The pages are anonymous and are * not associated with a file object. */ static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; BUF_CHECK_MAPPED(bp); to = round_page(to); from = round_page(from); index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: /* * note: must allocate system pages since blocking here * could interfere with paging I/O, no matter which * process we are. */ p = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_COUNT((to - pg) >> PAGE_SHIFT)); if (p == NULL) { VM_WAIT; goto tryagain; } pmap_qenter(pg, &p, 1); bp->b_pages[index] = p; } bp->b_npages = index; } /* Return pages associated with this buf to the vm system */ static void vm_hold_free_pages(struct buf *bp, int newbsize) { vm_offset_t from; vm_page_t p; int index, newnpages; BUF_CHECK_MAPPED(bp); from = round_page((vm_offset_t)bp->b_data + newbsize); newnpages = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; if (bp->b_npages > newnpages) pmap_qremove(from, bp->b_npages - newnpages); for (index = newnpages; index < bp->b_npages; index++) { p = bp->b_pages[index]; bp->b_pages[index] = NULL; if (vm_page_sbusied(p)) printf("vm_hold_free_pages: blkno: %jd, lblkno: %jd\n", (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno); p->wire_count--; vm_page_free(p); atomic_subtract_int(&vm_cnt.v_wire_count, 1); } bp->b_npages = newnpages; } /* * Map an IO request into kernel virtual address space. * * All requests are (re)mapped into kernel VA space. * Notice that we use b_bufsize for the size of the buffer * to be mapped. b_bcount might be modified by the driver. * * Note that even if the caller determines that the address space should * be valid, a race or a smaller-file mapped into a larger space may * actually cause vmapbuf() to fail, so all callers of vmapbuf() MUST * check the return value. * * This function only works with pager buffers. */ int vmapbuf(struct buf *bp, int mapbuf) { vm_prot_t prot; int pidx; if (bp->b_bufsize < 0) return (-1); prot = VM_PROT_READ; if (bp->b_iocmd == BIO_READ) prot |= VM_PROT_WRITE; /* Less backwards than it looks */ if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages, btoc(MAXPHYS))) < 0) return (-1); bp->b_npages = pidx; bp->b_offset = ((vm_offset_t)bp->b_data) & PAGE_MASK; if (mapbuf || !unmapped_buf_allowed) { pmap_qenter((vm_offset_t)bp->b_kvabase, bp->b_pages, pidx); bp->b_data = bp->b_kvabase + bp->b_offset; } else bp->b_data = unmapped_buf; return(0); } /* * Free the io map PTEs associated with this IO operation. * We also invalidate the TLB entries and restore the original b_addr. * * This function only works with pager buffers. */ void vunmapbuf(struct buf *bp) { int npages; npages = bp->b_npages; if (buf_mapped(bp)) pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages); vm_page_unhold_pages(bp->b_pages, npages); bp->b_data = unmapped_buf; } void bdone(struct buf *bp) { struct mtx *mtxp; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); bp->b_flags |= B_DONE; wakeup(bp); mtx_unlock(mtxp); } void bwait(struct buf *bp, u_char pri, const char *wchan) { struct mtx *mtxp; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); while ((bp->b_flags & B_DONE) == 0) msleep(bp, mtxp, pri, wchan, 0); mtx_unlock(mtxp); } int bufsync(struct bufobj *bo, int waitfor) { return (VOP_FSYNC(bo->__bo_vnode, waitfor, curthread)); } void bufstrategy(struct bufobj *bo, struct buf *bp) { int i = 0; struct vnode *vp; vp = bp->b_vp; KASSERT(vp == bo->bo_private, ("Inconsistent vnode bufstrategy")); KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, ("Wrong vnode in bufstrategy(bp=%p, vp=%p)", bp, vp)); i = VOP_STRATEGY(vp, bp); KASSERT(i == 0, ("VOP_STRATEGY failed bp=%p vp=%p", bp, bp->b_vp)); } void bufobj_wrefl(struct bufobj *bo) { KASSERT(bo != NULL, ("NULL bo in bufobj_wref")); ASSERT_BO_WLOCKED(bo); bo->bo_numoutput++; } void bufobj_wref(struct bufobj *bo) { KASSERT(bo != NULL, ("NULL bo in bufobj_wref")); BO_LOCK(bo); bo->bo_numoutput++; BO_UNLOCK(bo); } void bufobj_wdrop(struct bufobj *bo) { KASSERT(bo != NULL, ("NULL bo in bufobj_wdrop")); BO_LOCK(bo); KASSERT(bo->bo_numoutput > 0, ("bufobj_wdrop non-positive count")); if ((--bo->bo_numoutput == 0) && (bo->bo_flag & BO_WWAIT)) { bo->bo_flag &= ~BO_WWAIT; wakeup(&bo->bo_numoutput); } BO_UNLOCK(bo); } int bufobj_wwait(struct bufobj *bo, int slpflag, int timeo) { int error; KASSERT(bo != NULL, ("NULL bo in bufobj_wwait")); ASSERT_BO_WLOCKED(bo); error = 0; while (bo->bo_numoutput) { bo->bo_flag |= BO_WWAIT; error = msleep(&bo->bo_numoutput, BO_LOCKPTR(bo), slpflag | (PRIBIO + 1), "bo_wwait", timeo); if (error) break; } return (error); } void bpin(struct buf *bp) { struct mtx *mtxp; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); bp->b_pin_count++; mtx_unlock(mtxp); } void bunpin(struct buf *bp) { struct mtx *mtxp; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); if (--bp->b_pin_count == 0) wakeup(bp); mtx_unlock(mtxp); } void bunpin_wait(struct buf *bp) { struct mtx *mtxp; mtxp = mtx_pool_find(mtxpool_sleep, bp); mtx_lock(mtxp); while (bp->b_pin_count > 0) msleep(bp, mtxp, PRIBIO, "bwunpin", 0); mtx_unlock(mtxp); } /* * Set bio_data or bio_ma for struct bio from the struct buf. */ void bdata2bio(struct buf *bp, struct bio *bip) { if (!buf_mapped(bp)) { KASSERT(unmapped_buf_allowed, ("unmapped")); bip->bio_ma = bp->b_pages; bip->bio_ma_n = bp->b_npages; bip->bio_data = unmapped_buf; bip->bio_ma_offset = (vm_offset_t)bp->b_offset & PAGE_MASK; bip->bio_flags |= BIO_UNMAPPED; KASSERT(round_page(bip->bio_ma_offset + bip->bio_length) / PAGE_SIZE == bp->b_npages, ("Buffer %p too short: %d %lld %d", bp, bip->bio_ma_offset, (long long)bip->bio_length, bip->bio_ma_n)); } else { bip->bio_data = bp->b_data; bip->bio_ma = NULL; } } #include "opt_ddb.h" #ifdef DDB #include /* DDB command to show buffer data */ DB_SHOW_COMMAND(buffer, db_show_buffer) { /* get args */ struct buf *bp = (struct buf *)addr; if (!have_addr) { db_printf("usage: show buffer \n"); return; } db_printf("buf at %p\n", bp); db_printf("b_flags = 0x%b, b_xflags=0x%b, b_vflags=0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS, (u_int)bp->b_xflags, PRINT_BUF_XFLAGS, (u_int)bp->b_vflags, PRINT_BUF_VFLAGS); db_printf( "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n" "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, " "b_dep = %p\n", bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid, bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno, (intmax_t)bp->b_lblkno, bp->b_dep.lh_first); db_printf("b_kvabase = %p, b_kvasize = %d\n", bp->b_kvabase, bp->b_kvasize); if (bp->b_npages) { int i; db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages); for (i = 0; i < bp->b_npages; i++) { vm_page_t m; m = bp->b_pages[i]; db_printf("(%p, 0x%lx, 0x%lx)", (void *)m->object, (u_long)m->pindex, (u_long)VM_PAGE_TO_PHYS(m)); if ((i + 1) < bp->b_npages) db_printf(","); } db_printf("\n"); } db_printf(" "); BUF_LOCKPRINTINFO(bp); } DB_SHOW_COMMAND(lockedbufs, lockedbufs) { struct buf *bp; int i; for (i = 0; i < nbuf; i++) { bp = &buf[i]; if (BUF_ISLOCKED(bp)) { db_show_buffer((uintptr_t)bp, 1, 0, NULL); db_printf("\n"); } } } DB_SHOW_COMMAND(vnodebufs, db_show_vnodebufs) { struct vnode *vp; struct buf *bp; if (!have_addr) { db_printf("usage: show vnodebufs \n"); return; } vp = (struct vnode *)addr; db_printf("Clean buffers:\n"); TAILQ_FOREACH(bp, &vp->v_bufobj.bo_clean.bv_hd, b_bobufs) { db_show_buffer((uintptr_t)bp, 1, 0, NULL); db_printf("\n"); } db_printf("Dirty buffers:\n"); TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { db_show_buffer((uintptr_t)bp, 1, 0, NULL); db_printf("\n"); } } DB_COMMAND(countfreebufs, db_coundfreebufs) { struct buf *bp; int i, used = 0, nfree = 0; if (have_addr) { db_printf("usage: countfreebufs\n"); return; } for (i = 0; i < nbuf; i++) { bp = &buf[i]; if ((bp->b_flags & B_INFREECNT) != 0) nfree++; else used++; } db_printf("Counted %d free, %d used (%d tot)\n", nfree, used, nfree + used); db_printf("numfreebuffers is %d\n", numfreebuffers); } #endif /* DDB */ Index: projects/clang370-import/sys/netinet/toecore.c =================================================================== --- projects/clang370-import/sys/netinet/toecore.c (revision 288125) +++ projects/clang370-import/sys/netinet/toecore.c (revision 288126) @@ -1,655 +1,584 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include static struct mtx toedev_lock; static TAILQ_HEAD(, toedev) toedev_list; static eventhandler_tag listen_start_eh; static eventhandler_tag listen_stop_eh; static eventhandler_tag lle_event_eh; static eventhandler_tag route_redirect_eh; static int toedev_connect(struct toedev *tod __unused, struct socket *so __unused, struct rtentry *rt __unused, struct sockaddr *nam __unused) { return (ENOTSUP); } static int toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static int toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static void toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, struct mbuf *m) { m_freem(m); return; } static void toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) { return; } static int toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static void toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) { return; } static void toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, struct sockaddr *sa __unused, uint8_t *lladdr __unused, uint16_t vtag __unused) { return; } static void toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, struct rtentry *rt0 __unused, struct rtentry *rt1 __unused) { return; } static void toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) { return; } static void toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) { return; } static int toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, struct mbuf *m) { m_freem(m); return (0); } static void toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, struct socket *so __unused) { return; } static void toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, int sopt_dir __unused, int sopt_name __unused) { return; } /* * Inform one or more TOE devices about a listening socket. */ static void toe_listen_start(struct inpcb *inp, void *arg) { struct toedev *t, *tod; struct tcpcb *tp; INP_WLOCK_ASSERT(inp); KASSERT(inp->inp_pcbinfo == &V_tcbinfo, ("%s: inp is not a TCP inp", __func__)); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) return; tp = intotcpcb(inp); if (tp->t_state != TCPS_LISTEN) return; t = arg; mtx_lock(&toedev_lock); TAILQ_FOREACH(tod, &toedev_list, link) { if (t == NULL || t == tod) tod->tod_listen_start(tod, tp); } mtx_unlock(&toedev_lock); } static void toe_listen_start_event(void *arg __unused, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_LISTEN, ("%s: t_state %s", __func__, tcpstates[tp->t_state])); toe_listen_start(inp, NULL); } static void toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) { struct toedev *tod; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_LISTEN, ("%s: t_state %s", __func__, tcpstates[tp->t_state])); mtx_lock(&toedev_lock); TAILQ_FOREACH(tod, &toedev_list, link) tod->tod_listen_stop(tod, tp); mtx_unlock(&toedev_lock); } /* * Fill up a freshly allocated toedev struct with reasonable defaults. */ void init_toedev(struct toedev *tod) { tod->tod_softc = NULL; /* * Provide no-op defaults so that the kernel can call any toedev * function without having to check whether the TOE driver supplied one * or not. */ tod->tod_connect = toedev_connect; tod->tod_listen_start = toedev_listen_start; tod->tod_listen_stop = toedev_listen_stop; tod->tod_input = toedev_input; tod->tod_rcvd = toedev_rcvd; tod->tod_output = toedev_output; tod->tod_send_rst = toedev_output; tod->tod_send_fin = toedev_output; tod->tod_pcb_detach = toedev_pcb_detach; tod->tod_l2_update = toedev_l2_update; tod->tod_route_redirect = toedev_route_redirect; tod->tod_syncache_added = toedev_syncache_added; tod->tod_syncache_removed = toedev_syncache_removed; tod->tod_syncache_respond = toedev_syncache_respond; tod->tod_offload_socket = toedev_offload_socket; tod->tod_ctloutput = toedev_ctloutput; } /* * Register an active TOE device with the system. This allows it to receive * notifications from the kernel. */ int register_toedev(struct toedev *tod) { struct toedev *t; mtx_lock(&toedev_lock); TAILQ_FOREACH(t, &toedev_list, link) { if (t == tod) { mtx_unlock(&toedev_lock); return (EEXIST); } } TAILQ_INSERT_TAIL(&toedev_list, tod, link); registered_toedevs++; mtx_unlock(&toedev_lock); inp_apply_all(toe_listen_start, tod); return (0); } /* * Remove the TOE device from the global list of active TOE devices. It is the * caller's responsibility to ensure that the TOE device is quiesced prior to * this call. */ int unregister_toedev(struct toedev *tod) { struct toedev *t, *t2; int rc = ENODEV; mtx_lock(&toedev_lock); TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { if (t == tod) { TAILQ_REMOVE(&toedev_list, tod, link); registered_toedevs--; rc = 0; break; } } KASSERT(registered_toedevs >= 0, ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); mtx_unlock(&toedev_lock); return (rc); } void toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, void *tod, void *todctx) { struct socket *lso = inp->inp_socket; INP_WLOCK_ASSERT(inp); syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx); } int toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct socket **lsop) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); return (syncache_expand(inc, to, th, lsop, NULL)); } /* * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP * header (presumably for an incoming SYN) is also provided, an existing 4-tuple * in TIME_WAIT may be assassinated freeing it up for re-use. * * Note that the TCP header must have been run through tcp_fields_to_host() or * equivalent. */ int toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) { struct inpcb *inp; if (inc->inc_flags & INC_ISIPV6) { inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr, inc->inc_fport, &inc->inc6_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); } else { inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); } if (inp != NULL) { INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */ if (!tcp_twcheck(inp, NULL, th, NULL, 0)) return (EADDRINUSE); } else { INP_WUNLOCK(inp); return (EADDRINUSE); } } return (0); } static void toe_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct toedev *tod; struct ifnet *ifp; struct sockaddr *sa; uint8_t *lladdr; uint16_t vtag; int family; struct sockaddr_in6 sin6; LLE_WLOCK_ASSERT(lle); ifp = lltable_get_ifp(lle->lle_tbl); family = lltable_get_af(lle->lle_tbl); if (family != AF_INET && family != AF_INET6) return; /* * Not interested if the interface's TOE capability is not enabled. */ if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) return; tod = TOEDEV(ifp); if (tod == NULL) return; sa = (struct sockaddr *)&sin6; lltable_fill_sa_entry(lle, sa); vtag = 0xfff; if (evt != LLENTRY_RESOLVED) { /* * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean * this entry is going to be deleted. */ lladdr = NULL; } else { KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); lladdr = (uint8_t *)&lle->ll_addr; #ifdef VLAN_TAG VLAN_TAG(ifp, &vtag); #endif } tod->tod_l2_update(tod, ifp, sa, lladdr, vtag); } /* * XXX: implement. */ static void toe_route_redirect_event(void *arg __unused, struct rtentry *rt0, struct rtentry *rt1, struct sockaddr *sa) { return; } -#ifdef INET6 /* - * XXX: no checks to verify that sa is really a neighbor because we assume it is - * the result of a route lookup and is on-link on the given ifp. - */ -static int -toe_nd6_resolve(struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr) -{ - struct llentry *lle, *lle_tmp; - struct sockaddr_in6 *sin6 = (void *)sa; - int rc, flags = 0; - -restart: - IF_AFDATA_RLOCK(ifp); - lle = lla_lookup(LLTABLE6(ifp), flags, sa); - IF_AFDATA_RUNLOCK(ifp); - if (lle == NULL) { - lle = nd6_alloc(&sin6->sin6_addr, 0, ifp); - if (lle == NULL) - return (ENOMEM); /* Couldn't create entry in cache. */ - IF_AFDATA_WLOCK(ifp); - LLE_WLOCK(lle); - lle_tmp = nd6_lookup(&sin6->sin6_addr, ND6_EXCLUSIVE, ifp); - /* Prefer any existing lle over newly-created one */ - if (lle_tmp == NULL) - lltable_link_entry(LLTABLE6(ifp), lle); - IF_AFDATA_WUNLOCK(ifp); - if (lle_tmp == NULL) { - /* Arm timer for newly-created entry and send NS */ - nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE); - LLE_WUNLOCK(lle); - - nd6_ns_output(ifp, NULL, NULL, &sin6->sin6_addr, 0); - - return (EWOULDBLOCK); - } else { - /* Drop newly-created lle and switch to existing one */ - lltable_free_entry(LLTABLE6(ifp), lle); - lle = lle_tmp; - lle_tmp = NULL; - } - } - - if (lle->ln_state == ND6_LLINFO_STALE) { - if ((flags & LLE_EXCLUSIVE) == 0) { - LLE_RUNLOCK(lle); - flags |= LLE_EXCLUSIVE; - goto restart; - } - - LLE_WLOCK_ASSERT(lle); - - lle->la_asked = 0; - nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY); - } - - if (lle->la_flags & LLE_VALID) { - memcpy(lladdr, &lle->ll_addr, ifp->if_addrlen); - rc = 0; - } else - rc = EWOULDBLOCK; - - if (flags & LLE_EXCLUSIVE) - LLE_WUNLOCK(lle); - else - LLE_RUNLOCK(lle); - - return (rc); -} -#endif - -/* * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's * tod_l2_update will be called later, when the entry is resolved or times out. */ int toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr, uint16_t *vtag) { int rc; switch (sa->sa_family) { #ifdef INET case AF_INET: rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL); break; #endif #ifdef INET6 case AF_INET6: - rc = toe_nd6_resolve(ifp, sa, lladdr); + rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL); break; #endif default: return (EPROTONOSUPPORT); } if (rc == 0) { #ifdef VLAN_TAG if (VLAN_TAG(ifp, vtag) != 0) #endif *vtag = 0xfff; } return (rc); } void toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) { INP_WLOCK_ASSERT(inp); if (!(inp->inp_flags & INP_DROPPED)) { struct tcpcb *tp = intotcpcb(inp); KASSERT(tp->t_flags & TF_TOE, ("%s: tp %p not offloaded.", __func__, tp)); if (err == EAGAIN) { /* * Temporary failure during offload, take this PCB back. * Detach from the TOE driver and do the rest of what * TCP's pru_connect would have done if the connection * wasn't offloaded. */ tod->tod_pcb_detach(tod, tp); KASSERT(!(tp->t_flags & TF_TOE), ("%s: tp %p still offloaded.", __func__, tp)); tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); (void) tcp_output(tp); } else { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); tp = tcp_drop(tp, err); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } } INP_WLOCK_ASSERT(inp); } static int toecore_load(void) { mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); TAILQ_INIT(&toedev_list); listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, EVENTHANDLER_PRI_ANY); route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event, toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY); return (0); } static int toecore_unload(void) { mtx_lock(&toedev_lock); if (!TAILQ_EMPTY(&toedev_list)) { mtx_unlock(&toedev_lock); return (EBUSY); } EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh); mtx_unlock(&toedev_lock); mtx_destroy(&toedev_lock); return (0); } static int toecore_mod_handler(module_t mod, int cmd, void *arg) { if (cmd == MOD_LOAD) return (toecore_load()); if (cmd == MOD_UNLOAD) return (toecore_unload()); return (EOPNOTSUPP); } static moduledata_t mod_data= { "toecore", toecore_mod_handler, 0 }; MODULE_VERSION(toecore, 1); DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); Index: projects/clang370-import/sys/vm/vm_page.c =================================================================== --- projects/clang370-import/sys/vm/vm_page.c (revision 288125) +++ projects/clang370-import/sys/vm/vm_page.c (revision 288126) @@ -1,3337 +1,3341 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 */ /*- * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * GENERAL RULES ON VM_PAGE MANIPULATION * * - A page queue lock is required when adding or removing a page from a * page queue regardless of other locks or the busy state of a page. * * * In general, no thread besides the page daemon can acquire or * hold more than one page queue lock at a time. * * * The page daemon can acquire and hold any pair of page queue * locks in any order. * * - The object lock is required when inserting or removing * pages from an object (vm_page_insert() or vm_page_remove()). * */ /* * Resident memory management module. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Associated with page of user-allocatable memory is a * page structure. */ struct vm_domain vm_dom[MAXMEMDOM]; struct mtx_padalign vm_page_queue_free_mtx; struct mtx_padalign pa_lock[PA_LOCK_COUNT]; vm_page_t vm_page_array; long vm_page_array_size; long first_page; int vm_page_zero_count; static int boot_pages = UMA_BOOT_PAGES; SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &boot_pages, 0, "number of pages allocated for bootstrapping the VM system"); static int pa_tryrelock_restart; SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, &pa_tryrelock_restart, 0, "Number of tryrelock restarts"); static TAILQ_HEAD(, vm_page) blacklist_head; static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages"); static uma_zone_t fakepg_zone; static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_cache_turn_free(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); static void vm_page_init_fakepg(void *dummy); static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL); static void vm_page_init_fakepg(void *dummy) { fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM); } /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */ #if PAGE_SIZE == 32768 #ifdef CTASSERT CTASSERT(sizeof(u_long) >= 8); #endif #endif /* * Try to acquire a physical address lock while a pmap is locked. If we * fail to trylock we unlock and lock the pmap directly and cache the * locked pa in *locked. The caller should then restart their loop in case * the virtual to physical mapping has changed. */ int vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) { vm_paddr_t lockpa; lockpa = *locked; *locked = pa; if (lockpa) { PA_LOCK_ASSERT(lockpa, MA_OWNED); if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) return (0); PA_UNLOCK(lockpa); } if (PA_TRYLOCK(pa)) return (0); PMAP_UNLOCK(pmap); atomic_add_int(&pa_tryrelock_restart, 1); PA_LOCK(pa); PMAP_LOCK(pmap); return (EAGAIN); } /* * vm_set_page_size: * * Sets the page size, perhaps based upon the memory * size. Must be called before any use of page-size * dependent functions. */ void vm_set_page_size(void) { if (vm_cnt.v_page_size == 0) vm_cnt.v_page_size = PAGE_SIZE; if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0) panic("vm_set_page_size: page size not a power of two"); } /* * vm_page_blacklist_next: * * Find the next entry in the provided string of blacklist * addresses. Entries are separated by space, comma, or newline. * If an invalid integer is encountered then the rest of the * string is skipped. Updates the list pointer to the next * character, or NULL if the string is exhausted or invalid. */ static vm_paddr_t vm_page_blacklist_next(char **list, char *end) { vm_paddr_t bad; char *cp, *pos; if (list == NULL || *list == NULL) return (0); if (**list =='\0') { *list = NULL; return (0); } /* * If there's no end pointer then the buffer is coming from * the kenv and we know it's null-terminated. */ if (end == NULL) end = *list + strlen(*list); /* Ensure that strtoq() won't walk off the end */ if (*end != '\0') { if (*end == '\n' || *end == ' ' || *end == ',') *end = '\0'; else { printf("Blacklist not terminated, skipping\n"); *list = NULL; return (0); } } for (pos = *list; *pos != '\0'; pos = cp) { bad = strtoq(pos, &cp, 0); if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') { if (bad == 0) { if (++cp < end) continue; else break; } } else break; if (*cp == '\0' || ++cp >= end) *list = NULL; else *list = cp; return (trunc_page(bad)); } printf("Garbage in RAM blacklist, skipping\n"); *list = NULL; return (0); } /* * vm_page_blacklist_check: * * Iterate through the provided string of blacklist addresses, pulling * each entry out of the physical allocator free list and putting it * onto a list for reporting via the vm.page_blacklist sysctl. */ static void vm_page_blacklist_check(char *list, char *end) { vm_paddr_t pa; vm_page_t m; char *next; int ret; next = list; while (next != NULL) { if ((pa = vm_page_blacklist_next(&next, end)) == 0) continue; m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) continue; mtx_lock(&vm_page_queue_free_mtx); ret = vm_phys_unfree_page(m); mtx_unlock(&vm_page_queue_free_mtx); if (ret == TRUE) { TAILQ_INSERT_TAIL(&blacklist_head, m, listq); if (bootverbose) printf("Skipping page with pa 0x%jx\n", (uintmax_t)pa); } } } /* * vm_page_blacklist_load: * * Search for a special module named "ram_blacklist". It'll be a * plain text file provided by the user via the loader directive * of the same name. */ static void vm_page_blacklist_load(char **list, char **end) { void *mod; u_char *ptr; u_int len; mod = NULL; ptr = NULL; mod = preload_search_by_type("ram_blacklist"); if (mod != NULL) { ptr = preload_fetch_addr(mod); len = preload_fetch_size(mod); } *list = ptr; if (ptr != NULL) *end = ptr + len; else *end = NULL; return; } static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS) { vm_page_t m; struct sbuf sbuf; int error, first; first = 1; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); TAILQ_FOREACH(m, &blacklist_head, listq) { sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",", (uintmax_t)m->phys_addr); first = 0; } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } static void vm_page_domain_init(struct vm_domain *vmd) { struct vm_pagequeue *pq; int i; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) = "vm inactive pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) = &vm_cnt.v_inactive_count; *__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) = "vm active pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = &vm_cnt.v_active_count; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; vmd->vmd_oom = FALSE; vmd->vmd_pass = 0; for (i = 0; i < PQ_COUNT; i++) { pq = &vmd->vmd_pagequeues[i]; TAILQ_INIT(&pq->pq_pl); mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue", MTX_DEF | MTX_DUPOK); } } /* * vm_page_startup: * * Initializes the resident memory module. * * Allocates memory for the page cells, and * for the object/offset-to-page hash table headers. * Each page cell is initialized and placed on the free list. */ vm_offset_t vm_page_startup(vm_offset_t vaddr) { vm_offset_t mapped; vm_paddr_t page_range; vm_paddr_t new_end; int i; vm_paddr_t pa; vm_paddr_t last_pa; char *list, *listend; vm_paddr_t end; vm_paddr_t biggestsize; vm_paddr_t low_water, high_water; int biggestone; biggestsize = 0; biggestone = 0; vaddr = round_page(vaddr); for (i = 0; phys_avail[i + 1]; i += 2) { phys_avail[i] = round_page(phys_avail[i]); phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); } low_water = phys_avail[0]; high_water = phys_avail[1]; for (i = 0; i < vm_phys_nsegs; i++) { if (vm_phys_segs[i].start < low_water) low_water = vm_phys_segs[i].start; if (vm_phys_segs[i].end > high_water) high_water = vm_phys_segs[i].end; } for (i = 0; phys_avail[i + 1]; i += 2) { vm_paddr_t size = phys_avail[i + 1] - phys_avail[i]; if (size > biggestsize) { biggestone = i; biggestsize = size; } if (phys_avail[i] < low_water) low_water = phys_avail[i]; if (phys_avail[i + 1] > high_water) high_water = phys_avail[i + 1]; } end = phys_avail[biggestone+1]; /* * Initialize the page and queue locks. */ mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF); for (i = 0; i < PA_LOCK_COUNT; i++) mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF); for (i = 0; i < vm_ndomains; i++) vm_page_domain_init(&vm_dom[i]); /* * Allocate memory for use when boot strapping the kernel memory * allocator. * * CTFLAG_RDTUN doesn't work during the early boot process, so we must * manually fetch the value. */ TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages); new_end = end - (boot_pages * UMA_SLAB_SIZE); new_end = trunc_page(new_end); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)mapped, end - new_end); uma_startup((void *)mapped, boot_pages); #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ defined(__i386__) || defined(__mips__) /* * Allocate a bitmap to indicate that a random physical page * needs to be included in a minidump. * * The amd64 port needs this to indicate which direct map pages * need to be dumped, via calls to dump_add_page()/dump_drop_page(). * * However, i386 still needs this workspace internally within the * minidump code. In theory, they are not needed on i386, but are * included should the sf_buf code decide to use them. */ last_pa = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) if (dump_avail[i + 1] > last_pa) last_pa = dump_avail[i + 1]; page_range = last_pa / PAGE_SIZE; vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); new_end -= vm_page_dump_size; vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end, new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE); bzero((void *)vm_page_dump, vm_page_dump_size); #endif #ifdef __amd64__ /* * Request that the physical pages underlying the message buffer be * included in a crash dump. Since the message buffer is accessed * through the direct map, they are not automatically included. */ pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr); last_pa = pa + round_page(msgbufsize); while (pa < last_pa) { dump_add_page(pa); pa += PAGE_SIZE; } #endif /* * Compute the number of pages of memory that will be available for * use (taking into account the overhead of a page structure per * page). */ first_page = low_water / PAGE_SIZE; #ifdef VM_PHYSSEG_SPARSE page_range = 0; for (i = 0; i < vm_phys_nsegs; i++) { page_range += atop(vm_phys_segs[i].end - vm_phys_segs[i].start); } for (i = 0; phys_avail[i + 1] != 0; i += 2) page_range += atop(phys_avail[i + 1] - phys_avail[i]); #elif defined(VM_PHYSSEG_DENSE) page_range = high_water / PAGE_SIZE - first_page; #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif end = new_end; /* * Reserve an unmapped guard page to trap access to vm_page_array[-1]. */ vaddr += PAGE_SIZE; /* * Initialize the mem entry structures now, and put them in the free * queue. */ new_end = trunc_page(end - page_range * sizeof(struct vm_page)); mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); vm_page_array = (vm_page_t) mapped; #if VM_NRESERVLEVEL > 0 /* * Allocate memory for the reservation management system's data * structures. */ new_end = vm_reserv_startup(&vaddr, new_end, high_water); #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) /* * pmap_map on arm64, amd64, and mips can come out of the direct-map, * not kvm like i386, so the pages must be tracked for a crashdump to * include this data. This includes the vm_page_array and the early * UMA bootstrap pages. */ for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) dump_add_page(pa); #endif phys_avail[biggestone + 1] = new_end; /* * Add physical memory segments corresponding to the available * physical pages. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); /* * Clear all of the page structures */ bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); for (i = 0; i < page_range; i++) vm_page_array[i].order = VM_NFREEORDER; vm_page_array_size = page_range; /* * Initialize the physical memory allocator. */ vm_phys_init(); /* * Add every available physical page that is not blacklisted to * the free lists. */ vm_cnt.v_page_count = 0; vm_cnt.v_free_count = 0; for (i = 0; phys_avail[i + 1] != 0; i += 2) { pa = phys_avail[i]; last_pa = phys_avail[i + 1]; while (pa < last_pa) { vm_phys_add_page(pa); pa += PAGE_SIZE; } } TAILQ_INIT(&blacklist_head); vm_page_blacklist_load(&list, &listend); vm_page_blacklist_check(list, listend); list = kern_getenv("vm.blacklist"); vm_page_blacklist_check(list, NULL); freeenv(list); #if VM_NRESERVLEVEL > 0 /* * Initialize the reservation management system. */ vm_reserv_init(); #endif return (vaddr); } void vm_page_reference(vm_page_t m) { vm_page_aflag_set(m, PGA_REFERENCED); } /* * vm_page_busy_downgrade: * * Downgrade an exclusive busy page into a single shared busy page. */ void vm_page_busy_downgrade(vm_page_t m) { u_int x; vm_page_assert_xbusied(m); for (;;) { x = m->busy_lock; x &= VPB_BIT_WAITERS; if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1) | x)) break; } } /* * vm_page_sbusied: * * Return a positive value if the page is shared busied, 0 otherwise. */ int vm_page_sbusied(vm_page_t m) { u_int x; x = m->busy_lock; return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED); } /* * vm_page_sunbusy: * * Shared unbusy a page. */ void vm_page_sunbusy(vm_page_t m) { u_int x; vm_page_assert_sbusied(m); for (;;) { x = m->busy_lock; if (VPB_SHARERS(x) > 1) { if (atomic_cmpset_int(&m->busy_lock, x, x - VPB_ONE_SHARER)) break; continue; } if ((x & VPB_BIT_WAITERS) == 0) { KASSERT(x == VPB_SHARERS_WORD(1), ("vm_page_sunbusy: invalid lock state")); if (atomic_cmpset_int(&m->busy_lock, VPB_SHARERS_WORD(1), VPB_UNBUSIED)) break; continue; } KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS), ("vm_page_sunbusy: invalid lock state for waiters")); vm_page_lock(m); if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) { vm_page_unlock(m); continue; } wakeup(m); vm_page_unlock(m); break; } } /* * vm_page_busy_sleep: * * Sleep and release the page lock, using the page pointer as wchan. * This is used to implement the hard-path of busying mechanism. * * The given page must be locked. */ void vm_page_busy_sleep(vm_page_t m, const char *wmesg) { u_int x; vm_page_lock_assert(m, MA_OWNED); x = m->busy_lock; if (x == VPB_UNBUSIED) { vm_page_unlock(m); return; } if ((x & VPB_BIT_WAITERS) == 0 && !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) { vm_page_unlock(m); return; } msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0); } /* * vm_page_trysbusy: * * Try to shared busy a page. * If the operation succeeds 1 is returned otherwise 0. * The operation never sleeps. */ int vm_page_trysbusy(vm_page_t m) { u_int x; for (;;) { x = m->busy_lock; if ((x & VPB_BIT_SHARED) == 0) return (0); if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER)) return (1); } } /* * vm_page_xunbusy_hard: * * Called after the first try the exclusive unbusy of a page failed. * It is assumed that the waiters bit is on. */ void vm_page_xunbusy_hard(vm_page_t m) { vm_page_assert_xbusied(m); vm_page_lock(m); atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); wakeup(m); vm_page_unlock(m); } /* * vm_page_flash: * * Wakeup anyone waiting for the page. * The ownership bits do not change. * * The given page must be locked. */ void vm_page_flash(vm_page_t m) { u_int x; vm_page_lock_assert(m, MA_OWNED); for (;;) { x = m->busy_lock; if ((x & VPB_BIT_WAITERS) == 0) return; if (atomic_cmpset_int(&m->busy_lock, x, x & (~VPB_BIT_WAITERS))) break; } wakeup(m); } /* * Keep page from being freed by the page daemon * much of the same effect as wiring, except much lower * overhead and should be used only for *very* temporary * holding ("wiring"). */ void vm_page_hold(vm_page_t mem) { vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } void vm_page_unhold(vm_page_t mem) { vm_page_lock_assert(mem, MA_OWNED); KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!")); --mem->hold_count; if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0) vm_page_free_toq(mem); } /* * vm_page_unhold_pages: * * Unhold each of the pages that is referenced by the given array. */ void vm_page_unhold_pages(vm_page_t *ma, int count) { struct mtx *mtx, *new_mtx; mtx = NULL; for (; count != 0; count--) { /* * Avoid releasing and reacquiring the same page lock. */ new_mtx = vm_page_lockptr(*ma); if (mtx != new_mtx) { if (mtx != NULL) mtx_unlock(mtx); mtx = new_mtx; mtx_lock(mtx); } vm_page_unhold(*ma); ma++; } if (mtx != NULL) mtx_unlock(mtx); } vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa) { vm_page_t m; #ifdef VM_PHYSSEG_SPARSE m = vm_phys_paddr_to_vm_page(pa); if (m == NULL) m = vm_phys_fictitious_to_vm_page(pa); return (m); #elif defined(VM_PHYSSEG_DENSE) long pi; pi = atop(pa); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { m = &vm_page_array[pi - first_page]; return (m); } return (vm_phys_fictitious_to_vm_page(pa)); #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif } /* * vm_page_getfake: * * Create a fictitious page with the specified physical address and * memory attribute. The memory attribute is the only the machine- * dependent aspect of a fictitious page that must be initialized. */ vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr) { vm_page_t m; m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO); vm_page_initfake(m, paddr, memattr); return (m); } void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { if ((m->flags & PG_FICTITIOUS) != 0) { /* * The page's memattr might have changed since the * previous initialization. Update the pmap to the * new memattr. */ goto memattr; } m->phys_addr = paddr; m->queue = PQ_NONE; /* Fictitious pages don't use "segind". */ m->flags = PG_FICTITIOUS; /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; m->wire_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); } /* * vm_page_putfake: * * Release a fictitious page. */ void vm_page_putfake(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m)); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_putfake: bad page %p", m)); uma_zfree(fakepg_zone, m); } /* * vm_page_updatefake: * * Update the given fictitious page to the specified physical address and * memory attribute. */ void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) { KASSERT((m->flags & PG_FICTITIOUS) != 0, ("vm_page_updatefake: bad page %p", m)); m->phys_addr = paddr; pmap_page_set_memattr(m, memattr); } /* * vm_page_free: * * Free a page. */ void vm_page_free(vm_page_t m) { m->flags &= ~PG_ZERO; vm_page_free_toq(m); } /* * vm_page_free_zero: * * Free a page to the zerod-pages queue */ void vm_page_free_zero(vm_page_t m) { m->flags |= PG_ZERO; vm_page_free_toq(m); } /* * Unbusy and handle the page queueing for a page from the VOP_GETPAGES() * array which is not the request page. */ void vm_page_readahead_finish(vm_page_t m) { if (m->valid != 0) { /* * Since the page is not the requested page, whether * it should be activated or deactivated is not * obvious. Empirical results have shown that * deactivating the page is usually the best choice, * unless the page is wanted by another thread. */ vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); vm_page_unlock(m); vm_page_xunbusy(m); } else { /* * Free the completely invalid page. Such page state * occurs due to the short read operation which did * not covered our page at all, or in case when a read * error happens. */ vm_page_lock(m); vm_page_free(m); vm_page_unlock(m); } } /* * vm_page_sleep_if_busy: * * Sleep and release the page queues lock if the page is busied. * Returns TRUE if the thread slept. * * The given page must be unlocked and object containing it must * be locked. */ int vm_page_sleep_if_busy(vm_page_t m, const char *msg) { vm_object_t obj; vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); if (vm_page_busied(m)) { /* * The page-specific object must be cached because page * identity can change during the sleep, causing the * re-lock of a different object. * It is assumed that a reference to the object is already * held by the callers. */ obj = m->object; vm_page_lock(m); VM_OBJECT_WUNLOCK(obj); vm_page_busy_sleep(m, msg); VM_OBJECT_WLOCK(obj); return (TRUE); } return (FALSE); } /* * vm_page_dirty_KBI: [ internal use only ] * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). * * This function should only be called by vm_page_dirty(). */ void vm_page_dirty_KBI(vm_page_t m) { /* These assertions refer to this operation by its public name. */ KASSERT((m->flags & PG_CACHED) == 0, ("vm_page_dirty: page in cache!")); KASSERT(m->valid == VM_PAGE_BITS_ALL, ("vm_page_dirty: page is invalid!")); m->dirty = VM_PAGE_BITS_ALL; } /* * vm_page_insert: [ internal use only ] * * Inserts the given mem entry into the object and object list. * * The object must be locked. */ int vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) { vm_page_t mpred; VM_OBJECT_ASSERT_WLOCKED(object); mpred = vm_radix_lookup_le(&object->rtree, pindex); return (vm_page_insert_after(m, object, pindex, mpred)); } /* * vm_page_insert_after: * * Inserts the page "m" into the specified object at offset "pindex". * * The page "mpred" must immediately precede the offset "pindex" within * the specified object. * * The object must be locked. */ static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred) { vm_pindex_t sidx; vm_object_t sobj; vm_page_t msucc; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(m->object == NULL, ("vm_page_insert_after: page already inserted")); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); msucc = TAILQ_NEXT(mpred, listq); } else msucc = TAILQ_FIRST(&object->memq); if (msucc != NULL) KASSERT(msucc->pindex > pindex, ("vm_page_insert_after: msucc doesn't succeed pindex")); /* * Record the object/offset pair in this page */ sobj = m->object; sidx = m->pindex; m->object = object; m->pindex = pindex; /* * Now link into the object's ordered list of backed pages. */ if (vm_radix_insert(&object->rtree, m)) { m->object = sobj; m->pindex = sidx; return (1); } vm_page_insert_radixdone(m, object, mpred); return (0); } /* * vm_page_insert_radixdone: * * Complete page "m" insertion into the specified object after the * radix trie hooking. * * The page "mpred" must precede the offset "m->pindex" within the * specified object. * * The object must be locked. */ static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); if (mpred != NULL) { KASSERT(mpred->object == object, ("vm_page_insert_after: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); } if (mpred != NULL) TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq); else TAILQ_INSERT_HEAD(&object->memq, m, listq); /* * Show that the object has one more resident page. */ object->resident_page_count++; /* * Hold the vnode until the last page is released. */ if (object->resident_page_count == 1 && object->type == OBJT_VNODE) vhold(object->handle); /* * Since we are inserting a new and possibly dirty page, * update the object's OBJ_MIGHTBEDIRTY flag. */ if (pmap_page_is_write_mapped(m)) vm_object_set_writeable_dirty(object); } /* * vm_page_remove: * * Removes the given mem entry from the object/offset-page * table and the object page list, but do not invalidate/terminate * the backing store. * * The object must be locked. The page must be locked if it is managed. */ void vm_page_remove(vm_page_t m) { vm_object_t object; boolean_t lockacq; if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_lock_assert(m, MA_OWNED); if ((object = m->object) == NULL) return; VM_OBJECT_ASSERT_WLOCKED(object); if (vm_page_xbusied(m)) { lockacq = FALSE; if ((m->oflags & VPO_UNMANAGED) != 0 && !mtx_owned(vm_page_lockptr(m))) { lockacq = TRUE; vm_page_lock(m); } vm_page_flash(m); atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED); if (lockacq) vm_page_unlock(m); } /* * Now remove from the object's list of backed pages. */ vm_radix_remove(&object->rtree, m->pindex); TAILQ_REMOVE(&object->memq, m, listq); /* * And show that the object has one fewer resident page. */ object->resident_page_count--; /* * The vnode may now be recycled. */ if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); m->object = NULL; } /* * vm_page_lookup: * * Returns the page associated with the object/offset * pair specified; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_lookup(vm_object_t object, vm_pindex_t pindex) { VM_OBJECT_ASSERT_LOCKED(object); return (vm_radix_lookup(&object->rtree, pindex)); } /* * vm_page_find_least: * * Returns the page associated with the object with least pindex * greater than or equal to the parameter pindex, or NULL. * * The object must be locked. */ vm_page_t vm_page_find_least(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; VM_OBJECT_ASSERT_LOCKED(object); if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex) m = vm_radix_lookup_ge(&object->rtree, pindex); return (m); } /* * Returns the given page's successor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_next(vm_page_t m) { vm_page_t next; VM_OBJECT_ASSERT_WLOCKED(m->object); if ((next = TAILQ_NEXT(m, listq)) != NULL && next->pindex != m->pindex + 1) next = NULL; return (next); } /* * Returns the given page's predecessor (by pindex) within the object if it is * resident; if none is found, NULL is returned. * * The object must be locked. */ vm_page_t vm_page_prev(vm_page_t m) { vm_page_t prev; VM_OBJECT_ASSERT_WLOCKED(m->object); if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL && prev->pindex != m->pindex - 1) prev = NULL; return (prev); } /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. * * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) { vm_page_t mold, mpred; VM_OBJECT_ASSERT_WLOCKED(object); /* * This function mostly follows vm_page_insert() and * vm_page_remove() without the radix, object count and vnode * dance. Double check such functions for more comments. */ mpred = vm_radix_lookup(&object->rtree, pindex); KASSERT(mpred != NULL, ("vm_page_replace: replacing page not present with pindex")); mpred = TAILQ_PREV(mpred, respgs, listq); if (mpred != NULL) KASSERT(mpred->pindex < pindex, ("vm_page_insert_after: mpred doesn't precede pindex")); mnew->object = object; mnew->pindex = pindex; mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: mold is on a paging queue")); /* Detach the old page from the resident tailq. */ TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; vm_page_xunbusy(mold); /* Insert the new page in the resident tailq. */ if (mpred != NULL) TAILQ_INSERT_AFTER(&object->memq, mpred, mnew, listq); else TAILQ_INSERT_HEAD(&object->memq, mnew, listq); if (pmap_page_is_write_mapped(mnew)) vm_object_set_writeable_dirty(object); return (mold); } /* * vm_page_rename: * * Move the given memory entry from its * current object to the specified target object/offset. * * Note: swap associated with the page must be invalidated by the move. We * have to do this for several reasons: (1) we aren't freeing the * page, (2) we are dirtying the page, (3) the VM system is probably * moving the page from object A to B, and will then later move * the backing store from A to B and we can't have a conflict. * * Note: we *always* dirty the page. It is necessary both for the * fact that we moved it, and because we may be invalidating * swap. If the page is on the cache, we have to deactivate it * or vm_page_dirty() will panic. Dirty pages are not allowed * on the cache. * * The objects must be locked. */ int vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) { vm_page_t mpred; vm_pindex_t opidx; VM_OBJECT_ASSERT_WLOCKED(new_object); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); /* * Create a custom version of vm_page_insert() which does not depend * by m_prev and can cheat on the implementation aspects of the * function. */ opidx = m->pindex; m->pindex = new_pindex; if (vm_radix_insert(&new_object->rtree, m)) { m->pindex = opidx; return (1); } /* * The operation cannot fail anymore. The removal must happen before * the listq iterator is tainted. */ m->pindex = opidx; vm_page_lock(m); vm_page_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; vm_page_unlock(m); vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); return (0); } /* * Convert all of the given object's cached pages that have a * pindex within the given range into free pages. If the value * zero is given for "end", then the range's upper bound is * infinity. If the given object is backed by a vnode and it * transitions from having one or more cached pages to none, the * vnode's hold count is reduced. */ void vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { vm_page_t m; boolean_t empty; mtx_lock(&vm_page_queue_free_mtx); if (__predict_false(vm_radix_is_empty(&object->cache))) { mtx_unlock(&vm_page_queue_free_mtx); return; } while ((m = vm_radix_lookup_ge(&object->cache, start)) != NULL) { if (end != 0 && m->pindex >= end) break; vm_radix_remove(&object->cache, m->pindex); vm_page_cache_turn_free(m); } empty = vm_radix_is_empty(&object->cache); mtx_unlock(&vm_page_queue_free_mtx); if (object->type == OBJT_VNODE && empty) vdrop(object->handle); } /* * Returns the cached page that is associated with the given * object and offset. If, however, none exists, returns NULL. * * The free page queue must be locked. */ static inline vm_page_t vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); return (vm_radix_lookup(&object->cache, pindex)); } /* * Remove the given cached page from its containing object's * collection of cached pages. * * The free page queue must be locked. */ static void vm_page_cache_remove(vm_page_t m) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_remove: page %p is not cached", m)); vm_radix_remove(&m->object->cache, m->pindex); m->object = NULL; vm_cnt.v_cache_count--; } /* * Transfer all of the cached pages with offset greater than or * equal to 'offidxstart' from the original object's cache to the * new object's cache. However, any cached pages with offset * greater than or equal to the new object's size are kept in the * original object. Initially, the new object's cache must be * empty. Offset 'offidxstart' in the original object must * correspond to offset zero in the new object. * * The new object must be locked. */ void vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, vm_object_t new_object) { vm_page_t m; /* * Insertion into an object's collection of cached pages * requires the object to be locked. In contrast, removal does * not. */ VM_OBJECT_ASSERT_WLOCKED(new_object); KASSERT(vm_radix_is_empty(&new_object->cache), ("vm_page_cache_transfer: object %p has cached pages", new_object)); mtx_lock(&vm_page_queue_free_mtx); while ((m = vm_radix_lookup_ge(&orig_object->cache, offidxstart)) != NULL) { /* * Transfer all of the pages with offset greater than or * equal to 'offidxstart' from the original object's * cache to the new object's cache. */ if ((m->pindex - offidxstart) >= new_object->size) break; vm_radix_remove(&orig_object->cache, m->pindex); /* Update the page's object and offset. */ m->object = new_object; m->pindex -= offidxstart; if (vm_radix_insert(&new_object->cache, m)) vm_page_cache_turn_free(m); } mtx_unlock(&vm_page_queue_free_mtx); } /* * Returns TRUE if a cached page is associated with the given object and * offset, and FALSE otherwise. * * The object must be locked. */ boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex) { vm_page_t m; /* * Insertion into an object's collection of cached pages requires the * object to be locked. Therefore, if the object is locked and the * object's collection is empty, there is no need to acquire the free * page queues lock in order to prove that the specified page doesn't * exist. */ VM_OBJECT_ASSERT_WLOCKED(object); if (__predict_true(vm_object_cache_is_empty(object))) return (FALSE); mtx_lock(&vm_page_queue_free_mtx); m = vm_page_cache_lookup(object, pindex); mtx_unlock(&vm_page_queue_free_mtx); return (m != NULL); } /* * vm_page_alloc: * * Allocate and return a page that is associated with the specified * object and offset pair. By default, this page is exclusive busied. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_IFCACHED return page only if it is cached * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page * is cached * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { struct vnode *vp = NULL; vm_object_t m_object; vm_page_t m, mpred; int flags, req_class; mpred = 0; /* XXX: pacify gcc */ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object, req)); if (object != NULL) VM_OBJECT_ASSERT_WLOCKED(object); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; if (object != NULL) { mpred = vm_radix_lookup_le(&object->rtree, pindex); KASSERT(mpred == NULL || mpred->pindex != pindex, ("vm_page_alloc: pindex already allocated")); } /* * The page allocation request can came from consumers which already * hold the free page queue mutex, like vm_page_insert() in * vm_page_cache(). */ mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE); if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) { /* * Allocate from the free queue if the number of free pages * exceeds the minimum for the request class. */ if (object != NULL && (m = vm_page_cache_lookup(object, pindex)) != NULL) { if ((req & VM_ALLOC_IFNOTCACHED) != 0) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } if (vm_phys_unfree_page(m)) vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0); #if VM_NRESERVLEVEL > 0 else if (!vm_reserv_reactivate_page(m)) #else else #endif panic("vm_page_alloc: cache page %p is missing" " from the free queue", m); } else if ((req & VM_ALLOC_IFCACHED) != 0) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); #if VM_NRESERVLEVEL > 0 } else if (object == NULL || (object->flags & (OBJ_COLORED | OBJ_FICTITIOUS)) != OBJ_COLORED || (m = vm_reserv_alloc_page(object, pindex, mpred)) == NULL) { #else } else { #endif m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); #if VM_NRESERVLEVEL > 0 if (m == NULL && vm_reserv_reclaim_inactive()) { m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); } #endif } } else { /* * Not allocatable, give up. */ mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } /* * At this point we had better have found a good page. */ KASSERT(m != NULL, ("vm_page_alloc: missing page")); KASSERT(m->queue == PQ_NONE, ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue)); KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m)); KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m)); KASSERT(!vm_page_sbusied(m), ("vm_page_alloc: page %p is busy", m)); KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("vm_page_alloc: page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); if ((m->flags & PG_CACHED) != 0) { KASSERT((m->flags & PG_ZERO) == 0, ("vm_page_alloc: cached page %p is PG_ZERO", m)); KASSERT(m->valid != 0, ("vm_page_alloc: cached page %p is invalid", m)); if (m->object == object && m->pindex == pindex) vm_cnt.v_reactivated++; else m->valid = 0; m_object = m->object; vm_page_cache_remove(m); if (m_object->type == OBJT_VNODE && vm_object_cache_is_empty(m_object)) vp = m_object->handle; } else { KASSERT(m->valid == 0, ("vm_page_alloc: free page %p is valid", m)); vm_phys_freecnt_adj(m, -1); if ((m->flags & PG_ZERO) != 0) vm_page_zero_count--; } mtx_unlock(&vm_page_queue_free_mtx); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; flags &= m->flags; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; m->flags = flags; m->aflags = 0; m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; m->busy_lock = VPB_UNBUSIED; if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0) m->busy_lock = VPB_SINGLE_EXCLUSIVER; if ((req & VM_ALLOC_SBUSY) != 0) m->busy_lock = VPB_SHARERS_WORD(1); if (req & VM_ALLOC_WIRED) { /* * The page lock is not required for wiring a page until that * page is inserted into the object. */ atomic_add_int(&vm_cnt.v_wire_count, 1); m->wire_count = 1; } m->act_count = 0; if (object != NULL) { if (vm_page_insert_after(m, object, pindex, mpred)) { /* See the comment below about hold count. */ if (vp != NULL) vdrop(vp); pagedaemon_wakeup(); if (req & VM_ALLOC_WIRED) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); m->wire_count = 0; } m->object = NULL; m->oflags = VPO_UNMANAGED; vm_page_free(m); return (NULL); } /* Ignore device objects; the pager sets "memattr" for them. */ if (object->memattr != VM_MEMATTR_DEFAULT && (object->flags & OBJ_FICTITIOUS) == 0) pmap_page_set_memattr(m, object->memattr); } else m->pindex = pindex; /* * The following call to vdrop() must come after the above call * to vm_page_insert() in case both affect the same object and * vnode. Otherwise, the affected vnode's hold count could * temporarily become zero. */ if (vp != NULL) vdrop(vp); /* * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. */ if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } static void vm_page_alloc_contig_vdrop(struct spglist *lst) { while (!SLIST_EMPTY(lst)) { vdrop((struct vnode *)SLIST_FIRST(lst)-> plinks.s.pv); SLIST_REMOVE_HEAD(lst, plinks.s.ss); } } /* * vm_page_alloc_contig: * * Allocate a contiguous set of physical pages of the given size "npages" * from the free lists. All of the physical pages must be at or above * the given physical address "low" and below the given physical address * "high". The given value "alignment" determines the alignment of the * first physical page in the set. If the given value "boundary" is * non-zero, then the set of physical pages cannot cross any physical * address boundary that is a multiple of that value. Both "alignment" * and "boundary" must be a power of two. * * If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT, * then the memory attribute setting for the physical pages is configured * to the object's memory attribute setting. Otherwise, the memory * attribute setting for the physical pages is configured to "memattr", * overriding the object's memory attribute setting. However, if the * object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the * memory attribute setting for the physical pages cannot be configured * to VM_MEMATTR_DEFAULT. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_NOBUSY do not exclusive busy the page * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { struct vnode *drop; struct spglist deferred_vdrop_list; vm_page_t m, m_tmp, m_ret; u_int flags; int req_class; KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) && (object != NULL || (req & VM_ALLOC_SBUSY) == 0) && ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) != (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)), ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object, req)); if (object != NULL) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_PHYS, ("vm_page_alloc_contig: object %p isn't OBJT_PHYS", object)); } KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero")); req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; SLIST_INIT(&deferred_vdrop_list); mtx_lock(&vm_page_queue_free_mtx); if (vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages + vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages + vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages)) { #if VM_NRESERVLEVEL > 0 retry: if (object == NULL || (object->flags & OBJ_COLORED) == 0 || (m_ret = vm_reserv_alloc_contig(object, pindex, npages, low, high, alignment, boundary)) == NULL) #endif m_ret = vm_phys_alloc_contig(npages, low, high, alignment, boundary); } else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, npages); pagedaemon_wakeup(); return (NULL); } if (m_ret != NULL) for (m = m_ret; m < &m_ret[npages]; m++) { drop = vm_page_alloc_init(m); if (drop != NULL) { /* * Enqueue the vnode for deferred vdrop(). */ m->plinks.s.pv = drop; SLIST_INSERT_HEAD(&deferred_vdrop_list, m, plinks.s.ss); } } else { #if VM_NRESERVLEVEL > 0 if (vm_reserv_reclaim_contig(npages, low, high, alignment, boundary)) goto retry; #endif } mtx_unlock(&vm_page_queue_free_mtx); if (m_ret == NULL) return (NULL); /* * Initialize the pages. Only the PG_ZERO flag is inherited. */ flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; if ((req & VM_ALLOC_WIRED) != 0) atomic_add_int(&vm_cnt.v_wire_count, npages); if (object != NULL) { if (object->memattr != VM_MEMATTR_DEFAULT && memattr == VM_MEMATTR_DEFAULT) memattr = object->memattr; } for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = VPB_UNBUSIED; if (object != NULL) { if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) m->busy_lock = VPB_SINGLE_EXCLUSIVER; if ((req & VM_ALLOC_SBUSY) != 0) m->busy_lock = VPB_SHARERS_WORD(1); } if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 1; /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; if (object != NULL) { if (vm_page_insert(m, object, pindex)) { vm_page_alloc_contig_vdrop( &deferred_vdrop_list); if (vm_paging_needed()) pagedaemon_wakeup(); if ((req & VM_ALLOC_WIRED) != 0) atomic_subtract_int(&vm_cnt.v_wire_count, npages); for (m_tmp = m, m = m_ret; m < &m_ret[npages]; m++) { if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 0; if (m >= m_tmp) m->object = NULL; vm_page_free(m); } return (NULL); } } else m->pindex = pindex; if (memattr != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, memattr); pindex++; } vm_page_alloc_contig_vdrop(&deferred_vdrop_list); if (vm_paging_needed()) pagedaemon_wakeup(); return (m_ret); } /* * Initialize a page that has been freshly dequeued from a freelist. * The caller has to drop the vnode returned, if it is not NULL. * * This function may only be used to initialize unmanaged pages. * * To be called with vm_page_queue_free_mtx held. */ static struct vnode * vm_page_alloc_init(vm_page_t m) { struct vnode *drop; vm_object_t m_object; KASSERT(m->queue == PQ_NONE, ("vm_page_alloc_init: page %p has unexpected queue %d", m, m->queue)); KASSERT(m->wire_count == 0, ("vm_page_alloc_init: page %p is wired", m)); KASSERT(m->hold_count == 0, ("vm_page_alloc_init: page %p is held", m)); KASSERT(!vm_page_sbusied(m), ("vm_page_alloc_init: page %p is busy", m)); KASSERT(m->dirty == 0, ("vm_page_alloc_init: page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("vm_page_alloc_init: page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); drop = NULL; if ((m->flags & PG_CACHED) != 0) { KASSERT((m->flags & PG_ZERO) == 0, ("vm_page_alloc_init: cached page %p is PG_ZERO", m)); m->valid = 0; m_object = m->object; vm_page_cache_remove(m); if (m_object->type == OBJT_VNODE && vm_object_cache_is_empty(m_object)) drop = m_object->handle; } else { KASSERT(m->valid == 0, ("vm_page_alloc_init: free page %p is valid", m)); vm_phys_freecnt_adj(m, -1); if ((m->flags & PG_ZERO) != 0) vm_page_zero_count--; } return (drop); } /* * vm_page_alloc_freelist: * * Allocate a physical page from the specified free page list. * * The caller must always specify an allocation class. * * allocation classes: * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request * * optional allocation flags: * VM_ALLOC_COUNT(number) the number of additional pages that the caller * intends to allocate * VM_ALLOC_WIRED wire the allocated page * VM_ALLOC_ZERO prefer a zeroed page * * This routine may not sleep. */ vm_page_t vm_page_alloc_freelist(int flind, int req) { struct vnode *drop; vm_page_t m; u_int flags; int req_class; req_class = req & VM_ALLOC_CLASS_MASK; /* * The page daemon is allowed to dig deeper into the free page list. */ if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT) req_class = VM_ALLOC_SYSTEM; /* * Do not allocate reserved pages unless the req has asked for it. */ mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE); if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); else { mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1)); pagedaemon_wakeup(); return (NULL); } if (m == NULL) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } drop = vm_page_alloc_init(m); mtx_unlock(&vm_page_queue_free_mtx); /* * Initialize the page. Only the PG_ZERO flag is inherited. */ m->aflags = 0; flags = 0; if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; m->flags &= flags; if ((req & VM_ALLOC_WIRED) != 0) { /* * The page lock is not required for wiring a page that does * not belong to an object. */ atomic_add_int(&vm_cnt.v_wire_count, 1); m->wire_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; if (drop != NULL) vdrop(drop); if (vm_paging_needed()) pagedaemon_wakeup(); return (m); } /* * vm_wait: (also see VM_WAIT macro) * * Sleep until free pages are available for allocation. * - Called in various places before memory allocations. */ void vm_wait(void) { mtx_lock(&vm_page_queue_free_mtx); if (curproc == pageproc) { vm_pageout_pages_needed = 1; msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx, PDROP | PSWP, "VMWait", 0); } else { if (!vm_pages_needed) { vm_pages_needed = 1; wakeup(&vm_pages_needed); } msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM, "vmwait", 0); } } /* * vm_waitpfault: (also see VM_WAITPFAULT macro) * * Sleep until free pages are available for allocation. * - Called only in vm_fault so that processes page faulting * can be easily tracked. * - Sleeps at a lower priority than vm_wait() so that vm_wait()ing * processes will be able to grab memory first. Do not change * this balance without careful testing first. */ void vm_waitpfault(void) { mtx_lock(&vm_page_queue_free_mtx); if (!vm_pages_needed) { vm_pages_needed = 1; wakeup(&vm_pages_needed); } msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER, "pfault", 0); } struct vm_pagequeue * vm_page_pagequeue(vm_page_t m) { return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]); } /* * vm_page_dequeue: * * Remove the given page from its current page queue. * * The page must be locked. */ void vm_page_dequeue(vm_page_t m) { struct vm_pagequeue *pq; vm_page_assert_locked(m); KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); vm_pagequeue_unlock(pq); } /* * vm_page_dequeue_locked: * * Remove the given page from its current page queue. * * The page and page queue must be locked. */ void vm_page_dequeue_locked(vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); m->queue = PQ_NONE; TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); } /* * vm_page_enqueue: * * Add the given page to the specified page queue. * * The page must be locked. */ static void vm_page_enqueue(uint8_t queue, vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); KASSERT(queue < PQ_COUNT, ("vm_page_enqueue: invalid queue %u request for page %p", queue, m)); pq = &vm_phys_domain(m)->vmd_pagequeues[queue]; vm_pagequeue_lock(pq); m->queue = queue; TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } /* * vm_page_requeue: * * Move the given page to the tail of its current page queue. * * The page must be locked. */ void vm_page_requeue(vm_page_t m) { struct vm_pagequeue *pq; vm_page_lock_assert(m, MA_OWNED); KASSERT(m->queue != PQ_NONE, ("vm_page_requeue: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_lock(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_unlock(pq); } /* * vm_page_requeue_locked: * * Move the given page to the tail of its current page queue. * * The page queue must be locked. */ void vm_page_requeue_locked(vm_page_t m) { struct vm_pagequeue *pq; KASSERT(m->queue != PQ_NONE, ("vm_page_requeue_locked: page %p is not queued", m)); pq = vm_page_pagequeue(m); vm_pagequeue_assert_locked(pq); TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); } /* * vm_page_activate: * * Put the specified page on the active list (if appropriate). * Ensure that act_count is at least ACT_INIT but do not otherwise * mess with it. * * The page must be locked. */ void vm_page_activate(vm_page_t m) { int queue; vm_page_lock_assert(m, MA_OWNED); if ((queue = m->queue) != PQ_ACTIVE) { if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; if (queue != PQ_NONE) vm_page_dequeue(m); vm_page_enqueue(PQ_ACTIVE, m); } else KASSERT(queue == PQ_NONE, ("vm_page_activate: wired page %p is queued", m)); } else { if (m->act_count < ACT_INIT) m->act_count = ACT_INIT; } } /* * vm_page_free_wakeup: * * Helper routine for vm_page_free_toq() and vm_page_cache(). This * routine is called when a page has been added to the cache or free * queues. * * The page queues must be locked. */ static inline void vm_page_free_wakeup(void) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); /* * if pageout daemon needs pages, then tell it that there are * some free. */ if (vm_pageout_pages_needed && vm_cnt.v_cache_count + vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) { wakeup(&vm_pageout_pages_needed); vm_pageout_pages_needed = 0; } /* * wakeup processes that are waiting on memory if we hit a * high water mark. And wakeup scheduler process if we have * lots of memory. this process will swapin processes. */ if (vm_pages_needed && !vm_page_count_min()) { vm_pages_needed = 0; wakeup(&vm_cnt.v_free_count); } } /* * Turn a cached page into a free page, by changing its attributes. * Keep the statistics up-to-date. * * The free page queue must be locked. */ static void vm_page_cache_turn_free(vm_page_t m) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); m->object = NULL; m->valid = 0; KASSERT((m->flags & PG_CACHED) != 0, ("vm_page_cache_turn_free: page %p is not cached", m)); m->flags &= ~PG_CACHED; vm_cnt.v_cache_count--; vm_phys_freecnt_adj(m, 1); } /* * vm_page_free_toq: * * Returns the given page to the free list, * disassociating it with any VM object. * * The object must be locked. The page must be locked if it is managed. */ void vm_page_free_toq(vm_page_t m) { if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_toq: freeing mapped page %p", m)); } else KASSERT(m->queue == PQ_NONE, ("vm_page_free_toq: unmanaged page %p is queued", m)); PCPU_INC(cnt.v_tfree); if (vm_page_sbusied(m)) panic("vm_page_free: freeing busy page %p", m); /* * Unqueue, then remove page. Note that we cannot destroy * the page here because we do not want to call the pager's * callback routine until after we've put the page on the * appropriate free queue. */ vm_page_remque(m); vm_page_remove(m); /* * If fictitious remove object association and * return, otherwise delay object association removal. */ if ((m->flags & PG_FICTITIOUS) != 0) { return; } m->valid = 0; vm_page_undirty(m); if (m->wire_count != 0) panic("vm_page_free: freeing wired page %p", m); if (m->hold_count != 0) { m->flags &= ~PG_ZERO; KASSERT((m->flags & PG_UNHOLDFREE) == 0, ("vm_page_free: freeing PG_UNHOLDFREE page %p", m)); m->flags |= PG_UNHOLDFREE; } else { /* * Restore the default memory attribute to the page. */ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); /* * Insert the page into the physical memory allocator's * cache/free page queues. */ mtx_lock(&vm_page_queue_free_mtx); vm_phys_freecnt_adj(m, 1); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) #else if (TRUE) #endif vm_phys_free_pages(m, 0); if ((m->flags & PG_ZERO) != 0) ++vm_page_zero_count; else vm_page_zero_idle_wakeup(); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); } } /* * vm_page_wire: * * Mark this page as wired down by yet * another map, removing it from paging queues * as necessary. * * If the page is fictitious, then its wire count must remain one. * * The page must be locked. */ void vm_page_wire(vm_page_t m) { /* * Only bump the wire statistics if the page is not already wired, * and only unqueue the page if it is on some queue (if it is unmanaged * it is already off the queues). */ vm_page_lock_assert(m, MA_OWNED); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_wire: fictitious page %p's wire count isn't one", m)); return; } if (m->wire_count == 0) { KASSERT((m->oflags & VPO_UNMANAGED) == 0 || m->queue == PQ_NONE, ("vm_page_wire: unmanaged page %p is queued", m)); vm_page_remque(m); atomic_add_int(&vm_cnt.v_wire_count, 1); } m->wire_count++; KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } /* * vm_page_unwire: * - * Release one wiring of the specified page, potentially enabling it to be - * paged again. If paging is enabled, then the value of the parameter - * "queue" determines the queue to which the page is added. + * Release one wiring of the specified page, potentially allowing it to be + * paged out. Returns TRUE if the number of wirings transitions to zero and + * FALSE otherwise. * - * However, unless the page belongs to an object, it is not enqueued because - * it cannot be paged out. + * Only managed pages belonging to an object can be paged out. If the number + * of wirings transitions to zero and the page is eligible for page out, then + * the page is added to the specified paging queue (unless PQ_NONE is + * specified). * * If a page is fictitious, then its wire count must always be one. * * A managed page must be locked. */ -void +boolean_t vm_page_unwire(vm_page_t m, uint8_t queue) { - KASSERT(queue < PQ_COUNT, + KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_lock_assert(m, MA_OWNED); + vm_page_assert_locked(m); if ((m->flags & PG_FICTITIOUS) != 0) { KASSERT(m->wire_count == 1, ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); - return; + return (FALSE); } if (m->wire_count > 0) { m->wire_count--; if (m->wire_count == 0) { atomic_subtract_int(&vm_cnt.v_wire_count, 1); - if ((m->oflags & VPO_UNMANAGED) != 0 || - m->object == NULL) - return; - if (queue == PQ_INACTIVE) - m->flags &= ~PG_WINATCFLS; - vm_page_enqueue(queue, m); - } + if ((m->oflags & VPO_UNMANAGED) == 0 && + m->object != NULL && queue != PQ_NONE) { + if (queue == PQ_INACTIVE) + m->flags &= ~PG_WINATCFLS; + vm_page_enqueue(queue, m); + } + return (TRUE); + } else + return (FALSE); } else panic("vm_page_unwire: page %p's wire count is zero", m); } /* * Move the specified page to the inactive queue. * * Many pages placed on the inactive queue should actually go * into the cache, but it is difficult to figure out which. What * we do instead, if the inactive target is well met, is to put * clean pages at the head of the inactive queue instead of the tail. * This will cause them to be moved to the cache more quickly and * if not actively re-referenced, reclaimed more quickly. If we just * stick these pages at the end of the inactive queue, heavy filesystem * meta-data accesses can cause an unnecessary paging load on memory bound * processes. This optimization causes one-time-use metadata to be * reused more quickly. * * Normally athead is 0 resulting in LRU operation. athead is set * to 1 if we want this page to be 'as if it were placed in the cache', * except without unmapping it from the process address space. * * The page must be locked. */ static inline void _vm_page_deactivate(vm_page_t m, int athead) { struct vm_pagequeue *pq; int queue; vm_page_assert_locked(m); /* * Ignore if the page is already inactive, unless it is unlikely to be * reactivated. */ if ((queue = m->queue) == PQ_INACTIVE && !athead) return; if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) { pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; /* Avoid multiple acquisitions of the inactive queue lock. */ if (queue == PQ_INACTIVE) { vm_pagequeue_lock(pq); vm_page_dequeue_locked(m); } else { if (queue != PQ_NONE) vm_page_dequeue(m); m->flags &= ~PG_WINATCFLS; vm_pagequeue_lock(pq); } m->queue = PQ_INACTIVE; if (athead) TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q); else TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_inc(pq); vm_pagequeue_unlock(pq); } } /* * Move the specified page to the inactive queue. * * The page must be locked. */ void vm_page_deactivate(vm_page_t m) { _vm_page_deactivate(m, 0); } /* * vm_page_try_to_cache: * * Returns 0 on failure, 1 on success */ int vm_page_try_to_cache(vm_page_t m) { vm_page_lock_assert(m, MA_OWNED); VM_OBJECT_ASSERT_WLOCKED(m->object); if (m->dirty || m->hold_count || m->wire_count || (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) return (0); pmap_remove_all(m); if (m->dirty) return (0); vm_page_cache(m); return (1); } /* * vm_page_try_to_free() * * Attempt to free the page. If we cannot free it, we do nothing. * 1 is returned on success, 0 on failure. */ int vm_page_try_to_free(vm_page_t m) { vm_page_lock_assert(m, MA_OWNED); if (m->object != NULL) VM_OBJECT_ASSERT_WLOCKED(m->object); if (m->dirty || m->hold_count || m->wire_count || (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m)) return (0); pmap_remove_all(m); if (m->dirty) return (0); vm_page_free(m); return (1); } /* * vm_page_cache * * Put the specified page onto the page cache queue (if appropriate). * * The object and page must be locked. */ void vm_page_cache(vm_page_t m) { vm_object_t object; boolean_t cache_was_empty; vm_page_lock_assert(m, MA_OWNED); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) || m->hold_count || m->wire_count) panic("vm_page_cache: attempting to cache busy page"); KASSERT(!pmap_page_is_mapped(m), ("vm_page_cache: page %p is mapped", m)); KASSERT(m->dirty == 0, ("vm_page_cache: page %p is dirty", m)); if (m->valid == 0 || object->type == OBJT_DEFAULT || (object->type == OBJT_SWAP && !vm_pager_has_page(object, m->pindex, NULL, NULL))) { /* * Hypothesis: A cache-eligible page belonging to a * default object or swap object but without a backing * store must be zero filled. */ vm_page_free(m); return; } KASSERT((m->flags & PG_CACHED) == 0, ("vm_page_cache: page %p is already cached", m)); /* * Remove the page from the paging queues. */ vm_page_remque(m); /* * Remove the page from the object's collection of resident * pages. */ vm_radix_remove(&object->rtree, m->pindex); TAILQ_REMOVE(&object->memq, m, listq); object->resident_page_count--; /* * Restore the default memory attribute to the page. */ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); /* * Insert the page into the object's collection of cached pages * and the physical memory allocator's cache/free page queues. */ m->flags &= ~PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); cache_was_empty = vm_radix_is_empty(&object->cache); if (vm_radix_insert(&object->cache, m)) { mtx_unlock(&vm_page_queue_free_mtx); if (object->resident_page_count == 0) vdrop(object->handle); m->object = NULL; vm_page_free(m); return; } /* * The above call to vm_radix_insert() could reclaim the one pre- * existing cached page from this object, resulting in a call to * vdrop(). */ if (!cache_was_empty) cache_was_empty = vm_radix_is_singleton(&object->cache); m->flags |= PG_CACHED; vm_cnt.v_cache_count++; PCPU_INC(cnt.v_tcached); #if VM_NRESERVLEVEL > 0 if (!vm_reserv_free_page(m)) { #else if (TRUE) { #endif vm_phys_free_pages(m, 0); } vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); /* * Increment the vnode's hold count if this is the object's only * cached page. Decrement the vnode's hold count if this was * the object's only resident page. */ if (object->type == OBJT_VNODE) { if (cache_was_empty && object->resident_page_count != 0) vhold(object->handle); else if (!cache_was_empty && object->resident_page_count == 0) vdrop(object->handle); } } /* * vm_page_advise * * Deactivate or do nothing, as appropriate. This routine is used * by madvise() and vop_stdadvise(). * * The object and page must be locked. */ void vm_page_advise(vm_page_t m, int advice) { vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(m->object); if (advice == MADV_FREE) /* * Mark the page clean. This will allow the page to be freed * up by the system. However, such pages are often reused * quickly by malloc() so we do not do anything that would * cause a page fault if we can help it. * * Specifically, we do not try to actually free the page now * nor do we try to put it in the cache (which would cause a * page fault on reuse). * * But we do make the page as freeable as we can without * actually taking the step of unmapping it. */ m->dirty = 0; else if (advice != MADV_DONTNEED) return; /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. */ vm_page_aflag_clear(m, PGA_REFERENCED); if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) vm_page_dirty(m); /* * Place clean pages at the head of the inactive queue rather than the * tail, thus defeating the queue's LRU operation and ensuring that the * page will be reused quickly. */ _vm_page_deactivate(m, m->dirty == 0); } /* * Grab a page, waiting until we are waken up due to the page * changing state. We keep on waiting, if the page continues * to be in the object. If the page doesn't exist, first allocate it * and then conditionally zero it. * * This routine may sleep. * * The object must be locked on entry. The lock will, however, be released * and reacquired if the routine sleeps. */ vm_page_t vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) { vm_page_t m; int sleep; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || (allocflags & VM_ALLOC_IGN_SBUSY) != 0, ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); retrylookup: if ((m = vm_page_lookup(object, pindex)) != NULL) { sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ? vm_page_xbusied(m) : vm_page_busied(m); if (sleep) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(m, PGA_REFERENCED); vm_page_lock(m); VM_OBJECT_WUNLOCK(object); vm_page_busy_sleep(m, "pgrbwt"); VM_OBJECT_WLOCK(object); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) { vm_page_lock(m); vm_page_wire(m); vm_page_unlock(m); } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); if ((allocflags & VM_ALLOC_SBUSY) != 0) vm_page_sbusy(m); return (m); } } m = vm_page_alloc(object, pindex, allocflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) return (NULL); VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); goto retrylookup; } else if (m->valid != 0) return (m); if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); return (m); } /* * Mapping function for valid or dirty bits in a page. * * Inputs are required to range within a page. */ vm_page_bits_t vm_page_bits(int base, int size) { int first_bit; int last_bit; KASSERT( base + size <= PAGE_SIZE, ("vm_page_bits: illegal base/size %d/%d", base, size) ); if (size == 0) /* handle degenerate case */ return (0); first_bit = base >> DEV_BSHIFT; last_bit = (base + size - 1) >> DEV_BSHIFT; return (((vm_page_bits_t)2 << last_bit) - ((vm_page_bits_t)1 << first_bit)); } /* * vm_page_set_valid_range: * * Sets portions of a page valid. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zeroed. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_valid_range(vm_page_t m, int base, int size) { int endoff, frag; VM_OBJECT_ASSERT_WLOCKED(m->object); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = base & ~(DEV_BSIZE - 1)) != base && (m->valid & (1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Assert that no previously invalid block that is now being validated * is already dirty. */ KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0, ("vm_page_set_valid_range: page %p is dirty", m)); /* * Set valid bits inclusive of any overlap. */ m->valid |= vm_page_bits(base, size); } /* * Clear the given bits from the specified page's dirty field. */ static __inline void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) { uintptr_t addr; #if PAGE_SIZE < 16384 int shift; #endif /* * If the object is locked and the page is neither exclusive busy nor * write mapped, then the page's dirty field cannot possibly be * set by a concurrent pmap operation. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; else { /* * The pmap layer can call vm_page_dirty() without * holding a distinguished lock. The combination of * the object's lock and an atomic operation suffice * to guarantee consistency of the page dirty field. * * For PAGE_SIZE == 32768 case, compiler already * properly aligns the dirty field, so no forcible * alignment is needed. Only require existence of * atomic_clear_64 when page size is 32768. */ addr = (uintptr_t)&m->dirty; #if PAGE_SIZE == 32768 atomic_clear_64((uint64_t *)addr, pagebits); #elif PAGE_SIZE == 16384 atomic_clear_32((uint32_t *)addr, pagebits); #else /* PAGE_SIZE <= 8192 */ /* * Use a trick to perform a 32-bit atomic on the * containing aligned word, to not depend on the existence * of atomic_clear_{8, 16}. */ shift = addr & (sizeof(uint32_t) - 1); #if BYTE_ORDER == BIG_ENDIAN shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; #else shift *= NBBY; #endif addr &= ~(sizeof(uint32_t) - 1); atomic_clear_32((uint32_t *)addr, pagebits << shift); #endif /* PAGE_SIZE */ } } /* * vm_page_set_validclean: * * Sets portions of a page valid and clean. The arguments are expected * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive * of any partial chunks touched by the range. The invalid portion of * such chunks will be zero'd. * * (base + size) must be less then or equal to PAGE_SIZE. */ void vm_page_set_validclean(vm_page_t m, int base, int size) { vm_page_bits_t oldvalid, pagebits; int endoff, frag; VM_OBJECT_ASSERT_WLOCKED(m->object); if (size == 0) /* handle degenerate case */ return; /* * If the base is not DEV_BSIZE aligned and the valid * bit is clear, we have to zero out a portion of the * first block. */ if ((frag = base & ~(DEV_BSIZE - 1)) != base && (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, frag, base - frag); /* * If the ending offset is not DEV_BSIZE aligned and the * valid bit is clear, we have to zero out a portion of * the last block. */ endoff = base + size; if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0) pmap_zero_page_area(m, endoff, DEV_BSIZE - (endoff & (DEV_BSIZE - 1))); /* * Set valid, clear dirty bits. If validating the entire * page we can safely clear the pmap modify bit. We also * use this opportunity to clear the VPO_NOSYNC flag. If a process * takes a write fault on a MAP_NOSYNC memory area the flag will * be set again. * * We set valid bits inclusive of any overlap, but we can only * clear dirty bits for DEV_BSIZE chunks that are fully within * the range. */ oldvalid = m->valid; pagebits = vm_page_bits(base, size); m->valid |= pagebits; #if 0 /* NOT YET */ if ((frag = base & (DEV_BSIZE - 1)) != 0) { frag = DEV_BSIZE - frag; base += frag; size -= frag; if (size < 0) size = 0; } pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); #endif if (base == 0 && size == PAGE_SIZE) { /* * The page can only be modified within the pmap if it is * mapped, and it can only be mapped if it was previously * fully valid. */ if (oldvalid == VM_PAGE_BITS_ALL) /* * Perform the pmap_clear_modify() first. Otherwise, * a concurrent pmap operation, such as * pmap_protect(), could clear a modification in the * pmap and set the dirty field on the page before * pmap_clear_modify() had begun and after the dirty * field was cleared here. */ pmap_clear_modify(m); m->dirty = 0; m->oflags &= ~VPO_NOSYNC; } else if (oldvalid != VM_PAGE_BITS_ALL) m->dirty &= ~pagebits; else vm_page_clear_dirty_mask(m, pagebits); } void vm_page_clear_dirty(vm_page_t m, int base, int size) { vm_page_clear_dirty_mask(m, vm_page_bits(base, size)); } /* * vm_page_set_invalid: * * Invalidates DEV_BSIZE'd chunks within a page. Both the * valid and dirty bits for the effected areas are cleared. */ void vm_page_set_invalid(vm_page_t m, int base, int size) { vm_page_bits_t bits; vm_object_t object; object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + size >= object->un_pager.vnp.vnp_size) bits = VM_PAGE_BITS_ALL; else bits = vm_page_bits(base, size); if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL && bits != 0) pmap_remove_all(m); KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) || !pmap_page_is_mapped(m), ("vm_page_set_invalid: page %p is mapped", m)); m->valid &= ~bits; m->dirty &= ~bits; } /* * vm_page_zero_invalid() * * The kernel assumes that the invalid portions of a page contain * garbage, but such pages can be mapped into memory by user code. * When this occurs, we must zero out the non-valid portions of the * page so user code sees what it expects. * * Pages are most often semi-valid when the end of a file is mapped * into memory and the file's size is not page aligned. */ void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) { int b; int i; VM_OBJECT_ASSERT_WLOCKED(m->object); /* * Scan the valid bits looking for invalid sections that * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the * valid bit may be set ) have already been zeroed by * vm_page_set_validclean(). */ for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { if (i == (PAGE_SIZE / DEV_BSIZE) || (m->valid & ((vm_page_bits_t)1 << i))) { if (i > b) { pmap_zero_page_area(m, b << DEV_BSHIFT, (i - b) << DEV_BSHIFT); } b = i + 1; } } /* * setvalid is TRUE when we can safely set the zero'd areas * as being valid. We can do this if there are no cache consistancy * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. */ if (setvalid) m->valid = VM_PAGE_BITS_ALL; } /* * vm_page_is_valid: * * Is (partial) page valid? Note that the case where size == 0 * will return FALSE in the degenerate case where the page is * entirely invalid, and TRUE otherwise. */ int vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; VM_OBJECT_ASSERT_LOCKED(m->object); bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } /* * vm_page_ps_is_valid: * * Returns TRUE if the entire (super)page is valid and FALSE otherwise. */ boolean_t vm_page_ps_is_valid(vm_page_t m) { int i, npages; VM_OBJECT_ASSERT_LOCKED(m->object); npages = atop(pagesizes[m->psind]); /* * The physically contiguous pages that make up a superpage, i.e., a * page with a page size index ("psind") greater than zero, will * occupy adjacent entries in vm_page_array[]. */ for (i = 0; i < npages; i++) { if (m[i].valid != VM_PAGE_BITS_ALL) return (FALSE); } return (TRUE); } /* * Set the page's dirty bits if the page is modified. */ void vm_page_test_dirty(vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(m->object); if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) vm_page_dirty(m); } void vm_page_lock_KBI(vm_page_t m, const char *file, int line) { mtx_lock_flags_(vm_page_lockptr(m), 0, file, line); } void vm_page_unlock_KBI(vm_page_t m, const char *file, int line) { mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line); } int vm_page_trylock_KBI(vm_page_t m, const char *file, int line) { return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line)); } #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line) { vm_page_lock_assert_KBI(m, MA_OWNED, file, line); } void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line) { mtx_assert_(vm_page_lockptr(m), a, file, line); } #endif #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m) { /* * Certain of the page's fields may only be modified by the * holder of the containing object's lock or the exclusive busy. * holder. Unfortunately, the holder of the write busy is * not recorded, and thus cannot be checked here. */ if (m->object != NULL && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_WLOCKED(m->object); } void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits) { if ((bits & PGA_WRITEABLE) == 0) return; /* * The PGA_WRITEABLE flag can only be set if the page is * managed, is exclusively busied or the object is locked. * Currently, this flag is only set by pmap_enter(). */ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("PGA_WRITEABLE on unmanaged page")); if (!vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); } #endif #include "opt_ddb.h" #ifdef DDB #include #include DB_SHOW_COMMAND(page, vm_page_print_page_info) { db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count); db_printf("vm_cnt.v_cache_count: %d\n", vm_cnt.v_cache_count); db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count); db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count); db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count); db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved); db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min); db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target); db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target); } DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) { int dom; db_printf("pq_free %d pq_cache %d\n", vm_cnt.v_free_count, vm_cnt.v_cache_count); for (dom = 0; dom < vm_ndomains; dom++) { db_printf( "dom %d page_cnt %d free %d pq_act %d pq_inact %d pass %d\n", dom, vm_dom[dom].vmd_page_count, vm_dom[dom].vmd_free_count, vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt, vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt, vm_dom[dom].vmd_pass); } } DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) { vm_page_t m; boolean_t phys; if (!have_addr) { db_printf("show pginfo addr\n"); return; } phys = strchr(modif, 'p') != NULL; if (phys) m = PHYS_TO_VM_PAGE(addr); else m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ Index: projects/clang370-import/sys/vm/vm_page.h =================================================================== --- projects/clang370-import/sys/vm/vm_page.h (revision 288125) +++ projects/clang370-import/sys/vm/vm_page.h (revision 288126) @@ -1,673 +1,673 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ /* * Resident memory system definitions. */ #ifndef _VM_PAGE_ #define _VM_PAGE_ #include /* * Management of resident (logical) pages. * * A small structure is kept for each resident * page, indexed by page number. Each structure * is an element of several collections: * * A radix tree used to quickly * perform object/offset lookups * * A list of all pages for a given object, * so they can be quickly deactivated at * time of deallocation. * * An ordered list of pages due for pageout. * * In addition, the structure contains the object * and offset to which this page belongs (for pageout), * and sundry status bits. * * In general, operations on this structure's mutable fields are * synchronized using either one of or a combination of the lock on the * object that the page belongs to (O), the pool lock for the page (P), * or the lock for either the free or paging queue (Q). If a field is * annotated below with two of these locks, then holding either lock is * sufficient for read access, but both locks are required for write * access. * * In contrast, the synchronization of accesses to the page's * dirty field is machine dependent (M). In the * machine-independent layer, the lock on the object that the * page belongs to must be held in order to operate on the field. * However, the pmap layer is permitted to set all bits within * the field without holding that lock. If the underlying * architecture does not support atomic read-modify-write * operations on the field's type, then the machine-independent * layer uses a 32-bit atomic on the aligned 32-bit word that * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). */ #if PAGE_SIZE == 4096 #define VM_PAGE_BITS_ALL 0xffu typedef uint8_t vm_page_bits_t; #elif PAGE_SIZE == 8192 #define VM_PAGE_BITS_ALL 0xffffu typedef uint16_t vm_page_bits_t; #elif PAGE_SIZE == 16384 #define VM_PAGE_BITS_ALL 0xffffffffu typedef uint32_t vm_page_bits_t; #elif PAGE_SIZE == 32768 #define VM_PAGE_BITS_ALL 0xfffffffffffffffflu typedef uint64_t vm_page_bits_t; #endif struct vm_page { union { TAILQ_ENTRY(vm_page) q; /* page queue or free list (Q) */ struct { SLIST_ENTRY(vm_page) ss; /* private slists */ void *pv; } s; struct { u_long p; u_long v; } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ vm_object_t object; /* which object am I in (O,P) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page */ struct md_page md; /* machine dependant stuff */ u_int wire_count; /* wired down maps refs (P) */ volatile u_int busy_lock; /* busy owners lock */ uint16_t hold_count; /* page hold count (P) */ uint16_t flags; /* page PG_* flags (P) */ uint8_t aflags; /* access is atomic */ uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (P,Q) */ int8_t psind; /* pagesizes[] index (O) */ int8_t segind; uint8_t order; /* index of the buddy queue */ uint8_t pool; u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; /* * Page flags stored in oflags: * * Access to these page flags is synchronized by the lock on the object * containing the page (O). * * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG) * indicates that the page is not under PV management but * otherwise should be treated as a normal page. Pages not * under PV management cannot be paged out via the * object/vm_page_t because there is no knowledge of their pte * mappings, and such pages are also not on any PQ queue. * */ #define VPO_UNUSED01 0x01 /* --available-- */ #define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ #define VPO_NOSYNC 0x10 /* do not collect for syncer */ /* * Busy page implementation details. * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation, * even if the support for owner identity is removed because of size * constraints. Checks on lock recursion are then not possible, while the * lock assertions effectiveness is someway reduced. */ #define VPB_BIT_SHARED 0x01 #define VPB_BIT_EXCLUSIVE 0x02 #define VPB_BIT_WAITERS 0x04 #define VPB_BIT_FLAGMASK \ (VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS) #define VPB_SHARERS_SHIFT 3 #define VPB_SHARERS(x) \ (((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT) #define VPB_SHARERS_WORD(x) ((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED) #define VPB_ONE_SHARER (1 << VPB_SHARERS_SHIFT) #define VPB_SINGLE_EXCLUSIVER VPB_BIT_EXCLUSIVE #define VPB_UNBUSIED VPB_SHARERS_WORD(0) #define PQ_NONE 255 #define PQ_INACTIVE 0 #define PQ_ACTIVE 1 #define PQ_COUNT 2 TAILQ_HEAD(pglist, vm_page); SLIST_HEAD(spglist, vm_page); struct vm_pagequeue { struct mtx pq_mutex; struct pglist pq_pl; int pq_cnt; int * const pq_vcnt; const char * const pq_name; } __aligned(CACHE_LINE_SIZE); struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ boolean_t vmd_oom; int vmd_pass; /* local pagedaemon pass */ int vmd_last_active_scan; struct vm_page vmd_marker; /* marker for pagedaemon private use */ }; extern struct vm_domain vm_dom[MAXMEMDOM]; #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) #define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) #ifdef _KERNEL static __inline void vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) { #ifdef notyet vm_pagequeue_assert_locked(pq); #endif pq->pq_cnt += addend; atomic_add_int(pq->pq_vcnt, addend); } #define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1) #define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1) #endif /* _KERNEL */ extern struct mtx_padalign vm_page_queue_free_mtx; extern struct mtx_padalign pa_lock[]; #if defined(__arm__) #define PDRSHIFT PDR_SHIFT #elif !defined(PDRSHIFT) #define PDRSHIFT 21 #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define PA_LOCKPTR(pa) ((struct mtx *)(&pa_lock[pa_index(pa) % PA_LOCK_COUNT])) #define PA_LOCKOBJPTR(pa) ((struct lock_object *)PA_LOCKPTR((pa))) #define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) #define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) #define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) #define PA_UNLOCK_COND(pa) \ do { \ if ((pa) != 0) { \ PA_UNLOCK((pa)); \ (pa) = 0; \ } \ } while (0) #define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) #ifdef KLD_MODULE #define vm_page_lock(m) vm_page_lock_KBI((m), LOCK_FILE, LOCK_LINE) #define vm_page_unlock(m) vm_page_unlock_KBI((m), LOCK_FILE, LOCK_LINE) #define vm_page_trylock(m) vm_page_trylock_KBI((m), LOCK_FILE, LOCK_LINE) #else /* !KLD_MODULE */ #define vm_page_lockptr(m) (PA_LOCKPTR(VM_PAGE_TO_PHYS((m)))) #define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) #define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) #define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) #endif #if defined(INVARIANTS) #define vm_page_assert_locked(m) \ vm_page_assert_locked_KBI((m), __FILE__, __LINE__) #define vm_page_lock_assert(m, a) \ vm_page_lock_assert_KBI((m), (a), __FILE__, __LINE__) #else #define vm_page_assert_locked(m) #define vm_page_lock_assert(m, a) #endif /* * The vm_page's aflags are updated using atomic operations. To set or clear * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear() * must be used. Neither these flags nor these functions are part of the KBI. * * PGA_REFERENCED may be cleared only if the page is locked. It is set by * both the MI and MD VM layers. However, kernel loadable modules should not * directly set this flag. They should call vm_page_reference() instead. * * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). * When it does so, the object must be locked, or the page must be * exclusive busied. The MI VM layer must never access this flag * directly. Instead, it should call pmap_page_is_write_mapped(). * * PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has * at least one executable mapping. It is not consumed by the MI VM layer. */ #define PGA_WRITEABLE 0x01 /* page may be mapped writeable */ #define PGA_REFERENCED 0x02 /* page has been referenced */ #define PGA_EXECUTABLE 0x04 /* page may be mapped executable */ /* * Page flags. If changed at any other time than page allocation or * freeing, the modification must be protected by the vm_page lock. */ #define PG_CACHED 0x0001 /* page is cached */ #define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */ #define PG_ZERO 0x0008 /* page is zeroed */ #define PG_MARKER 0x0010 /* special queue marker page */ #define PG_WINATCFLS 0x0040 /* flush dirty page on inactive q */ #define PG_NODUMP 0x0080 /* don't include this page in a dump */ #define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */ /* * Misc constants. */ #define ACT_DECLINE 1 #define ACT_ADVANCE 3 #define ACT_INIT 5 #define ACT_MAX 64 #ifdef _KERNEL #include #include /* * Each pageable resident page falls into one of four lists: * * free * Available for allocation now. * * cache * Almost available for allocation. Still associated with * an object, but clean and immediately freeable. * * The following lists are LRU sorted: * * inactive * Low activity, candidates for reclamation. * This is the list of pages that should be * paged out next. * * active * Pages that are "active" i.e. they have been * recently referenced. * */ extern int vm_page_zero_count; extern vm_page_t vm_page_array; /* First resident page in table */ extern long vm_page_array_size; /* number of vm_page_t's */ extern long first_page; /* first physical page number */ #define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr) vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); /* * Page allocation parameters for vm_page for the functions * vm_page_alloc(), vm_page_grab(), vm_page_alloc_contig() and * vm_page_alloc_freelist(). Some functions support only a subset * of the flags, and ignore others, see the flags legend. * * Bits 0 - 1 define class. * Bits 2 - 15 dedicated for flags. * Legend: * (a) - vm_page_alloc() supports the flag. * (c) - vm_page_alloc_contig() supports the flag. * (f) - vm_page_alloc_freelist() supports the flag. * (g) - vm_page_grab() supports the flag. * Bits above 15 define the count of additional pages that the caller * intends to allocate. */ #define VM_ALLOC_NORMAL 0 #define VM_ALLOC_INTERRUPT 1 #define VM_ALLOC_SYSTEM 2 #define VM_ALLOC_CLASS_MASK 3 #define VM_ALLOC_WIRED 0x0020 /* (acfg) Allocate non pageable page */ #define VM_ALLOC_ZERO 0x0040 /* (acfg) Try to obtain a zeroed page */ #define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */ #define VM_ALLOC_NOBUSY 0x0200 /* (acg) Do not busy the page */ #define VM_ALLOC_IFCACHED 0x0400 /* (ag) Fail if page is not cached */ #define VM_ALLOC_IFNOTCACHED 0x0800 /* (ag) Fail if page is cached */ #define VM_ALLOC_IGN_SBUSY 0x1000 /* (g) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* (acg) Shared busy the page */ #define VM_ALLOC_NOWAIT 0x8000 /* (g) Do not sleep, return NULL */ #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) #ifdef M_NOWAIT static inline int malloc2vm_flags(int malloc_flags) { int pflags; KASSERT((malloc_flags & M_USE_RESERVE) == 0 || (malloc_flags & M_NOWAIT) != 0, ("M_USE_RESERVE requires M_NOWAIT")); pflags = (malloc_flags & M_USE_RESERVE) != 0 ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM; if ((malloc_flags & M_ZERO) != 0) pflags |= VM_ALLOC_ZERO; if ((malloc_flags & M_NODUMP) != 0) pflags |= VM_ALLOC_NODUMP; return (pflags); } #endif void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg); void vm_page_flash(vm_page_t m); void vm_page_hold(vm_page_t mem); void vm_page_unhold(vm_page_t mem); void vm_page_free(vm_page_t m); void vm_page_free_zero(vm_page_t m); void vm_page_activate (vm_page_t); void vm_page_advise(vm_page_t m, int advice); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_page_t vm_page_alloc_freelist(int, int); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t); int vm_page_try_to_cache (vm_page_t); int vm_page_try_to_free (vm_page_t); void vm_page_deactivate (vm_page_t); void vm_page_dequeue(vm_page_t m); void vm_page_dequeue_locked(vm_page_t m); vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t); vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); struct vm_pagequeue *vm_page_pagequeue(vm_page_t m); vm_page_t vm_page_prev(vm_page_t m); boolean_t vm_page_ps_is_valid(vm_page_t m); void vm_page_putfake(vm_page_t m); void vm_page_readahead_finish(vm_page_t m); void vm_page_reference(vm_page_t m); void vm_page_remove (vm_page_t); int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex); void vm_page_requeue(vm_page_t m); void vm_page_requeue_locked(vm_page_t m); int vm_page_sbusied(vm_page_t m); void vm_page_set_valid_range(vm_page_t m, int base, int size); int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); -void vm_page_unwire (vm_page_t m, uint8_t queue); +boolean_t vm_page_unwire(vm_page_t m, uint8_t queue); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_wire (vm_page_t); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); void vm_page_clear_dirty (vm_page_t, int, int); void vm_page_set_invalid (vm_page_t, int, int); int vm_page_is_valid (vm_page_t, int, int); void vm_page_test_dirty (vm_page_t); vm_page_bits_t vm_page_bits(int base, int size); void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); void vm_page_free_toq(vm_page_t m); void vm_page_zero_idle_wakeup(void); void vm_page_dirty_KBI(vm_page_t m); void vm_page_lock_KBI(vm_page_t m, const char *file, int line); void vm_page_unlock_KBI(vm_page_t m, const char *file, int line); int vm_page_trylock_KBI(vm_page_t m, const char *file, int line); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line); void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line); #endif #define vm_page_assert_sbusied(m) \ KASSERT(vm_page_sbusied(m), \ ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \ (void *)m, __FILE__, __LINE__)); #define vm_page_assert_unbusied(m) \ KASSERT(!vm_page_busied(m), \ ("vm_page_assert_unbusied: page %p busy @ %s:%d", \ (void *)m, __FILE__, __LINE__)); #define vm_page_assert_xbusied(m) \ KASSERT(vm_page_xbusied(m), \ ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \ (void *)m, __FILE__, __LINE__)); #define vm_page_busied(m) \ ((m)->busy_lock != VPB_UNBUSIED) #define vm_page_sbusy(m) do { \ if (!vm_page_trysbusy(m)) \ panic("%s: page %p failed shared busing", __func__, m); \ } while (0) #define vm_page_tryxbusy(m) \ (atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED, \ VPB_SINGLE_EXCLUSIVER)) #define vm_page_xbusied(m) \ ((m->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0) #define vm_page_xbusy(m) do { \ if (!vm_page_tryxbusy(m)) \ panic("%s: page %p failed exclusive busing", __func__, \ m); \ } while (0) #define vm_page_xunbusy(m) do { \ if (!atomic_cmpset_rel_int(&(m)->busy_lock, \ VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) \ vm_page_xunbusy_hard(m); \ } while (0) #ifdef INVARIANTS void vm_page_object_lock_assert(vm_page_t m); #define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m) void vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits); #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) \ vm_page_assert_pga_writeable(m, bits) #else #define VM_PAGE_OBJECT_LOCK_ASSERT(m) (void)0 #define VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits) (void)0 #endif /* * We want to use atomic updates for the aflags field, which is 8 bits wide. * However, not all architectures support atomic operations on 8-bit * destinations. In order that we can easily use a 32-bit operation, we * require that the aflags field be 32-bit aligned. */ CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); /* * Clear the given bits in the specified page. */ static inline void vm_page_aflag_clear(vm_page_t m, uint8_t bits) { uint32_t *addr, val; /* * The PGA_REFERENCED flag can only be cleared if the page is locked. */ if ((bits & PGA_REFERENCED) != 0) vm_page_assert_locked(m); /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ addr = (void *)&m->aflags; KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, ("vm_page_aflag_clear: aflags is misaligned")); val = bits; #if BYTE_ORDER == BIG_ENDIAN val <<= 24; #endif atomic_clear_32(addr, val); } /* * Set the given bits in the specified page. */ static inline void vm_page_aflag_set(vm_page_t m, uint8_t bits) { uint32_t *addr, val; VM_PAGE_ASSERT_PGA_WRITEABLE(m, bits); /* * Access the whole 32-bit word containing the aflags field with an * atomic update. Parallel non-atomic updates to the other fields * within this word are handled properly by the atomic update. */ addr = (void *)&m->aflags; KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, ("vm_page_aflag_set: aflags is misaligned")); val = bits; #if BYTE_ORDER == BIG_ENDIAN val <<= 24; #endif atomic_set_32(addr, val); } /* * vm_page_dirty: * * Set all bits in the page's dirty field. * * The object containing the specified page must be locked if the * call is made from the machine-independent layer. * * See vm_page_clear_dirty_mask(). */ static __inline void vm_page_dirty(vm_page_t m) { /* Use vm_page_dirty_KBI() under INVARIANTS to save memory. */ #if defined(KLD_MODULE) || defined(INVARIANTS) vm_page_dirty_KBI(m); #else m->dirty = VM_PAGE_BITS_ALL; #endif } /* * vm_page_remque: * * If the given page is in a page queue, then remove it from that page * queue. * * The page must be locked. */ static inline void vm_page_remque(vm_page_t m) { if (m->queue != PQ_NONE) vm_page_dequeue(m); } /* * vm_page_undirty: * * Set page to not be dirty. Note: does not clear pmap modify bits */ static __inline void vm_page_undirty(vm_page_t m) { VM_PAGE_OBJECT_LOCK_ASSERT(m); m->dirty = 0; } #endif /* _KERNEL */ #endif /* !_VM_PAGE_ */ Index: projects/clang370-import/sys/x86/iommu/intel_ctx.c =================================================================== --- projects/clang370-import/sys/x86/iommu/intel_ctx.c (revision 288125) +++ projects/clang370-import/sys/x86/iommu/intel_ctx.c (revision 288126) @@ -1,777 +1,776 @@ /*- * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context"); static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain"); static void dmar_domain_unload_task(void *arg, int pending); static void dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain); static void dmar_domain_destroy(struct dmar_domain *domain); -static void dmar_ctx_dtr(struct dmar_ctx *ctx); static void dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus) { struct sf_buf *sf; dmar_root_entry_t *re; vm_page_t ctxm; /* * Allocated context page must be linked. */ ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC); if (ctxm != NULL) return; /* * Page not present, allocate and link. Note that other * thread might execute this sequence in parallel. This * should be safe, because the context entries written by both * threads are equal. */ TD_PREP_PINNED_ASSERT; ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO | DMAR_PGF_WAITOK); re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf); re += bus; dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK & VM_PAGE_TO_PHYS(ctxm))); dmar_flush_root_to_ram(dmar, re); dmar_unmap_pgtbl(sf); TD_PINNED_ASSERT; } static dmar_ctx_entry_t * dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp) { dmar_ctx_entry_t *ctxp; ctxp = dmar_map_pgtbl(ctx->domain->dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->rid), DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp); ctxp += ctx->rid & 0xff; return (ctxp); } static void ctx_tag_init(struct dmar_ctx *ctx, device_t dev) { bus_addr_t maxaddr; maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR); ctx->ctx_tag.common.ref_count = 1; /* Prevent free */ ctx->ctx_tag.common.impl = &bus_dma_dmar_impl; ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY; ctx->ctx_tag.common.lowaddr = maxaddr; ctx->ctx_tag.common.highaddr = maxaddr; ctx->ctx_tag.common.maxsize = maxaddr; ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED; ctx->ctx_tag.common.maxsegsz = maxaddr; ctx->ctx_tag.ctx = ctx; ctx->ctx_tag.owner = dev; } static void ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move) { struct dmar_unit *unit; struct dmar_domain *domain; vm_page_t ctx_root; domain = ctx->domain; unit = domain->dmar; KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0), ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx", unit->unit, pci_get_bus(ctx->ctx_tag.owner), pci_get_slot(ctx->ctx_tag.owner), pci_get_function(ctx->ctx_tag.owner), ctxp->ctx1, ctxp->ctx2)); /* * For update due to move, the store is not atomic. It is * possible that DMAR read upper doubleword, while low * doubleword is not yet updated. The domain id is stored in * the upper doubleword, while the table pointer in the lower. * * There is no good solution, for the same reason it is wrong * to clear P bit in the ctx entry for update. */ dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) | domain->awlvl); if ((domain->flags & DMAR_DOMAIN_IDMAP) != 0 && (unit->hw_ecap & DMAR_ECAP_PT) != 0) { KASSERT(domain->pgtbl_obj == NULL, ("ctx %p non-null pgtbl_obj", ctx)); dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P); } else { ctx_root = dmar_pgalloc(domain->pgtbl_obj, 0, DMAR_PGF_NOALLOC); dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR | (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) | DMAR_CTX1_P); } dmar_flush_ctx_to_ram(unit, ctxp); } static int dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force) { int error; /* * If dmar declares Caching Mode as Set, follow 11.5 "Caching * Mode Consideration" and do the (global) invalidation of the * negative TLB entries. */ if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force) return (0); if (dmar->qi_enabled) { dmar_qi_invalidate_ctx_glob_locked(dmar); if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force) dmar_qi_invalidate_iotlb_glob_locked(dmar); return (0); } error = dmar_inv_ctx_glob(dmar); if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)) error = dmar_inv_iotlb_glob(dmar); return (error); } static int domain_init_rmrr(struct dmar_domain *domain, device_t dev) { struct dmar_map_entries_tailq rmrr_entries; struct dmar_map_entry *entry, *entry1; vm_page_t *ma; dmar_gaddr_t start, end; vm_pindex_t size, i; int error, error1; error = 0; TAILQ_INIT(&rmrr_entries); dmar_dev_parse_rmrr(domain, dev, &rmrr_entries); TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) { /* * VT-d specification requires that the start of an * RMRR entry is 4k-aligned. Buggy BIOSes put * anything into the start and end fields. Truncate * and round as neccesary. * * We also allow the overlapping RMRR entries, see * dmar_gas_alloc_region(). */ start = entry->start; end = entry->end; entry->start = trunc_page(start); entry->end = round_page(end); if (entry->start == entry->end) { /* Workaround for some AMI (?) BIOSes */ if (bootverbose) { device_printf(dev, "BIOS bug: dmar%d RMRR " "region (%jx, %jx) corrected\n", domain->dmar->unit, start, end); } entry->end += DMAR_PAGE_SIZE * 0x20; } size = OFF_TO_IDX(entry->end - entry->start); ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK); for (i = 0; i < size; i++) { ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, VM_MEMATTR_DEFAULT); } error1 = dmar_gas_map_region(domain, entry, DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE, DMAR_GM_CANWAIT, ma); /* * Non-failed RMRR entries are owned by context rb * tree. Get rid of the failed entry, but do not stop * the loop. Rest of the parsed RMRR entries are * loaded and removed on the context destruction. */ if (error1 == 0 && entry->end != entry->start) { DMAR_LOCK(domain->dmar); domain->refs++; /* XXXKIB prevent free */ domain->flags |= DMAR_DOMAIN_RMRR; DMAR_UNLOCK(domain->dmar); } else { if (error1 != 0) { device_printf(dev, "dmar%d failed to map RMRR region (%jx, %jx) %d\n", domain->dmar->unit, start, end, error1); error = error1; } TAILQ_REMOVE(&rmrr_entries, entry, unroll_link); dmar_gas_free_entry(domain, entry); } for (i = 0; i < size; i++) vm_page_putfake(ma[i]); free(ma, M_TEMP); } return (error); } static struct dmar_domain * dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped) { struct dmar_domain *domain; int error, id, mgaw; id = alloc_unr(dmar->domids); if (id == -1) return (NULL); domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO); domain->domain = id; LIST_INIT(&domain->contexts); RB_INIT(&domain->rb_root); TAILQ_INIT(&domain->unload_entries); TASK_INIT(&domain->unload_task, 0, dmar_domain_unload_task, domain); mtx_init(&domain->lock, "dmardom", NULL, MTX_DEF); domain->dmar = dmar; /* * For now, use the maximal usable physical address of the * installed memory to calculate the mgaw on id_mapped domain. * It is useful for the identity mapping, and less so for the * virtualized bus address space. */ domain->end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; mgaw = dmar_maxaddr2mgaw(dmar, domain->end, !id_mapped); error = domain_set_agaw(domain, mgaw); if (error != 0) goto fail; if (!id_mapped) /* Use all supported address space for remapping. */ domain->end = 1ULL << (domain->agaw - 1); dmar_gas_init_domain(domain); if (id_mapped) { if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) { domain->pgtbl_obj = domain_get_idmap_pgtbl(domain, domain->end); } domain->flags |= DMAR_DOMAIN_IDMAP; } else { error = domain_alloc_pgtbl(domain); if (error != 0) goto fail; /* Disable local apic region access */ error = dmar_gas_reserve_region(domain, 0xfee00000, 0xfeefffff + 1); if (error != 0) goto fail; } return (domain); fail: dmar_domain_destroy(domain); return (NULL); } static struct dmar_ctx * dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid) { struct dmar_ctx *ctx; ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO); ctx->domain = domain; ctx->rid = rid; ctx->refs = 1; return (ctx); } static void dmar_ctx_link(struct dmar_ctx *ctx) { struct dmar_domain *domain; domain = ctx->domain; DMAR_ASSERT_LOCKED(domain->dmar); KASSERT(domain->refs >= domain->ctx_cnt, ("dom %p ref underflow %d %d", domain, domain->refs, domain->ctx_cnt)); domain->refs++; domain->ctx_cnt++; LIST_INSERT_HEAD(&domain->contexts, ctx, link); } static void dmar_ctx_unlink(struct dmar_ctx *ctx) { struct dmar_domain *domain; domain = ctx->domain; DMAR_ASSERT_LOCKED(domain->dmar); KASSERT(domain->refs > 0, ("domain %p ctx dtr refs %d", domain, domain->refs)); KASSERT(domain->ctx_cnt >= domain->refs, ("domain %p ctx dtr refs %d ctx_cnt %d", domain, domain->refs, domain->ctx_cnt)); domain->refs--; domain->ctx_cnt--; LIST_REMOVE(ctx, link); } static void dmar_domain_destroy(struct dmar_domain *domain) { KASSERT(TAILQ_EMPTY(&domain->unload_entries), ("unfinished unloads %p", domain)); KASSERT(LIST_EMPTY(&domain->contexts), ("destroying dom %p with contexts", domain)); KASSERT(domain->ctx_cnt == 0, ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); KASSERT(domain->refs == 0, ("destroying dom %p with refs %d", domain, domain->refs)); if ((domain->flags & DMAR_DOMAIN_GAS_INITED) != 0) { DMAR_DOMAIN_LOCK(domain); dmar_gas_fini_domain(domain); DMAR_DOMAIN_UNLOCK(domain); } if ((domain->flags & DMAR_DOMAIN_PGTBL_INITED) != 0) { if (domain->pgtbl_obj != NULL) DMAR_DOMAIN_PGLOCK(domain); domain_free_pgtbl(domain); } mtx_destroy(&domain->lock); free_unr(domain->dmar->domids, domain->domain); free(domain, M_DMAR_DOMAIN); } struct dmar_ctx * dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) { struct dmar_domain *domain, *domain1; struct dmar_ctx *ctx, *ctx1; dmar_ctx_entry_t *ctxp; struct sf_buf *sf; int bus, slot, func, error; bool enable; bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); enable = false; TD_PREP_PINNED_ASSERT; DMAR_LOCK(dmar); ctx = dmar_find_ctx_locked(dmar, rid); error = 0; if (ctx == NULL) { /* * Perform the allocations which require sleep or have * higher chance to succeed if the sleep is allowed. */ DMAR_UNLOCK(dmar); dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); domain1 = dmar_domain_alloc(dmar, id_mapped); if (domain1 == NULL) { TD_PINNED_ASSERT; return (NULL); } error = domain_init_rmrr(domain1, dev); if (error != 0) { dmar_domain_destroy(domain1); TD_PINNED_ASSERT; return (NULL); } ctx1 = dmar_ctx_alloc(domain1, rid); ctxp = dmar_map_ctx_entry(ctx1, &sf); DMAR_LOCK(dmar); /* * Recheck the contexts, other thread might have * already allocated needed one. */ ctx = dmar_find_ctx_locked(dmar, rid); if (ctx == NULL) { domain = domain1; ctx = ctx1; dmar_ctx_link(ctx); ctx->ctx_tag.owner = dev; ctx_tag_init(ctx, dev); /* * This is the first activated context for the * DMAR unit. Enable the translation after * everything is set up. */ if (LIST_EMPTY(&dmar->domains)) enable = true; LIST_INSERT_HEAD(&dmar->domains, domain, link); ctx_id_entry_init(ctx, ctxp, false); device_printf(dev, "dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d " "agaw %d %s-mapped\n", dmar->unit, dmar->segment, bus, slot, func, rid, domain->domain, domain->mgaw, domain->agaw, id_mapped ? "id" : "re"); } else { /* Nothing needs to be done to destroy ctx1. */ dmar_domain_destroy(domain1); domain = ctx->domain; ctx->refs++; /* tag referenced us */ } dmar_unmap_pgtbl(sf); } else { domain = ctx->domain; ctx->refs++; /* tag referenced us */ } error = dmar_flush_for_ctx_entry(dmar, enable); if (error != 0) { dmar_free_ctx_locked(dmar, ctx); TD_PINNED_ASSERT; return (NULL); } /* * The dmar lock was potentially dropped between check for the * empty context list and now. Recheck the state of GCMD_TE * to avoid unneeded command. */ if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) { error = dmar_enable_translation(dmar); if (error != 0) { dmar_free_ctx_locked(dmar, ctx); TD_PINNED_ASSERT; return (NULL); } } DMAR_UNLOCK(dmar); TD_PINNED_ASSERT; return (ctx); } int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) { struct dmar_unit *dmar; struct dmar_domain *old_domain; dmar_ctx_entry_t *ctxp; struct sf_buf *sf; int error; dmar = domain->dmar; old_domain = ctx->domain; if (domain == old_domain) return (0); KASSERT(old_domain->dmar == dmar, ("domain %p %u moving between dmars %u %u", domain, domain->domain, old_domain->dmar->unit, domain->dmar->unit)); TD_PREP_PINNED_ASSERT; ctxp = dmar_map_ctx_entry(ctx, &sf); DMAR_LOCK(dmar); dmar_ctx_unlink(ctx); ctx->domain = domain; dmar_ctx_link(ctx); ctx_id_entry_init(ctx, ctxp, true); dmar_unmap_pgtbl(sf); error = dmar_flush_for_ctx_entry(dmar, true); /* If flush failed, rolling back would not work as well. */ printf("dmar%d rid %x domain %d->%d %s-mapped\n", dmar->unit, ctx->rid, old_domain->domain, domain->domain, (domain->flags & DMAR_DOMAIN_IDMAP) != 0 ? "id" : "re"); dmar_unref_domain_locked(dmar, old_domain); TD_PINNED_ASSERT; return (error); } static void dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain) { DMAR_ASSERT_LOCKED(dmar); KASSERT(domain->refs >= 1, ("dmar %d domain %p refs %u", dmar->unit, domain, domain->refs)); KASSERT(domain->refs > domain->ctx_cnt, ("dmar %d domain %p refs %d ctx_cnt %d", dmar->unit, domain, domain->refs, domain->ctx_cnt)); if (domain->refs > 1) { domain->refs--; DMAR_UNLOCK(dmar); return; } KASSERT((domain->flags & DMAR_DOMAIN_RMRR) == 0, ("lost ref on RMRR domain %p", domain)); LIST_REMOVE(domain, link); DMAR_UNLOCK(dmar); taskqueue_drain(dmar->delayed_taskqueue, &domain->unload_task); dmar_domain_destroy(domain); } void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx) { struct sf_buf *sf; dmar_ctx_entry_t *ctxp; struct dmar_domain *domain; DMAR_ASSERT_LOCKED(dmar); KASSERT(ctx->refs >= 1, ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); /* * If our reference is not last, only the dereference should * be performed. */ if (ctx->refs > 1) { ctx->refs--; DMAR_UNLOCK(dmar); return; } KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0, ("lost ref on disabled ctx %p", ctx)); /* * Otherwise, the context entry must be cleared before the * page table is destroyed. The mapping of the context * entries page could require sleep, unlock the dmar. */ DMAR_UNLOCK(dmar); TD_PREP_PINNED_ASSERT; ctxp = dmar_map_ctx_entry(ctx, &sf); DMAR_LOCK(dmar); KASSERT(ctx->refs >= 1, ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs)); /* * Other thread might have referenced the context, in which * case again only the dereference should be performed. */ if (ctx->refs > 1) { ctx->refs--; DMAR_UNLOCK(dmar); dmar_unmap_pgtbl(sf); TD_PINNED_ASSERT; return; } KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0, ("lost ref on disabled ctx %p", ctx)); /* * Clear the context pointer and flush the caches. * XXXKIB: cannot do this if any RMRR entries are still present. */ dmar_pte_clear(&ctxp->ctx1); ctxp->ctx2 = 0; dmar_flush_ctx_to_ram(dmar, ctxp); dmar_inv_ctx_glob(dmar); if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) { if (dmar->qi_enabled) dmar_qi_invalidate_iotlb_glob_locked(dmar); else dmar_inv_iotlb_glob(dmar); } dmar_unmap_pgtbl(sf); domain = ctx->domain; dmar_ctx_unlink(ctx); free(ctx, M_DMAR_CTX); dmar_unref_domain_locked(dmar, domain); TD_PINNED_ASSERT; } void dmar_free_ctx(struct dmar_ctx *ctx) { struct dmar_unit *dmar; dmar = ctx->domain->dmar; DMAR_LOCK(dmar); dmar_free_ctx_locked(dmar, ctx); } /* * Returns with the domain locked. */ struct dmar_ctx * dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) { struct dmar_domain *domain; struct dmar_ctx *ctx; DMAR_ASSERT_LOCKED(dmar); LIST_FOREACH(domain, &dmar->domains, link) { LIST_FOREACH(ctx, &domain->contexts, link) { if (ctx->rid == rid) return (ctx); } } return (NULL); } void dmar_domain_free_entry(struct dmar_map_entry *entry, bool free) { struct dmar_domain *domain; domain = entry->domain; DMAR_DOMAIN_LOCK(domain); if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0) dmar_gas_free_region(domain, entry); else dmar_gas_free_space(domain, entry); DMAR_DOMAIN_UNLOCK(domain); if (free) dmar_gas_free_entry(domain, entry); else entry->flags = 0; } void dmar_domain_unload_entry(struct dmar_map_entry *entry, bool free) { struct dmar_unit *unit; unit = entry->domain->dmar; if (unit->qi_enabled) { DMAR_LOCK(unit); dmar_qi_invalidate_locked(entry->domain, entry->start, entry->end - entry->start, &entry->gseq); if (!free) entry->flags |= DMAR_MAP_ENTRY_QI_NF; TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link); DMAR_UNLOCK(unit); } else { domain_flush_iotlb_sync(entry->domain, entry->start, entry->end - entry->start); dmar_domain_free_entry(entry, free); } } void dmar_domain_unload(struct dmar_domain *domain, struct dmar_map_entries_tailq *entries, bool cansleep) { struct dmar_unit *unit; struct dmar_map_entry *entry, *entry1; struct dmar_qi_genseq gseq; int error; unit = domain->dmar; TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0, ("not mapped entry %p %p", domain, entry)); error = domain_unmap_buf(domain, entry->start, entry->end - entry->start, cansleep ? DMAR_PGF_WAITOK : 0); KASSERT(error == 0, ("unmap %p error %d", domain, error)); if (!unit->qi_enabled) { domain_flush_iotlb_sync(domain, entry->start, entry->end - entry->start); TAILQ_REMOVE(entries, entry, dmamap_link); dmar_domain_free_entry(entry, true); } } if (TAILQ_EMPTY(entries)) return; KASSERT(unit->qi_enabled, ("loaded entry left")); DMAR_LOCK(unit); TAILQ_FOREACH(entry, entries, dmamap_link) { entry->gseq.gen = 0; entry->gseq.seq = 0; dmar_qi_invalidate_locked(domain, entry->start, entry->end - entry->start, TAILQ_NEXT(entry, dmamap_link) == NULL ? &gseq : NULL); } TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { entry->gseq = gseq; TAILQ_REMOVE(entries, entry, dmamap_link); TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link); } DMAR_UNLOCK(unit); } static void dmar_domain_unload_task(void *arg, int pending) { struct dmar_domain *domain; struct dmar_map_entries_tailq entries; domain = arg; TAILQ_INIT(&entries); for (;;) { DMAR_DOMAIN_LOCK(domain); TAILQ_SWAP(&domain->unload_entries, &entries, dmar_map_entry, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); if (TAILQ_EMPTY(&entries)) break; dmar_domain_unload(domain, &entries, true); } } Index: projects/clang370-import/sys =================================================================== --- projects/clang370-import/sys (revision 288125) +++ projects/clang370-import/sys (revision 288126) Property changes on: projects/clang370-import/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r288100-288125 Index: projects/clang370-import/usr.sbin/acpi/acpiconf/acpiconf.8 =================================================================== --- projects/clang370-import/usr.sbin/acpi/acpiconf/acpiconf.8 (revision 288125) +++ projects/clang370-import/usr.sbin/acpi/acpiconf/acpiconf.8 (revision 288126) @@ -1,96 +1,94 @@ .\"- .\" Copyright (c) 2000 Dag-Erling Coïdan Smørgrav .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer .\" in this position and unchanged. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. The name of the author may not be used to endorse or promote products .\" derived from this software without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd June 10, 2014 +.Dd September 22, 2015 .Dt ACPICONF 8 .Os .Sh NAME .Nm acpiconf .Nd control ACPI power management .Sh SYNOPSIS .Nm .Op Fl h .Op Fl i Ar batt .Op Fl k Ar ack .Op Fl s Ar type .Sh DESCRIPTION The .Nm utility allows the user control of the ACPI power management functions. The following command-line options are recognized: .Bl -tag -width ".Fl s Ar type" .It Fl h Displays a summary of available options. .It Fl i Ar batt Get design information about the specified battery. .It Fl k Ar ack Ack or abort a pending suspend request using the argument provided. .Sy Most users should not use this option directly. .It Fl s Ar type Enters the specified sleep mode. Recognized types are .Cm 1 (only the CPU clock is stopped), .Cm 2 (not implemented on most systems but similar to S1), .Cm 3 (the CPU context is lost and memory context is preserved), -.Cm 4 -(the CPU context is lost and memory context is stored to disk) and -.Cm 5 -(soft off). +.Cm 4 +(the CPU context is lost and memory context is stored to disk). Sleep states may also be given as S1, S2, etc. The supported states depend on BIOS implementation, including ACPI byte code (AML). If the .Pa /etc/rc.suspend and .Pa /etc/rc.resume scripts are executable, they will be run before and after entering the given sleep state. .El .Sh SEE ALSO .Xr acpi 4 , .Xr acpidump 8 , .Xr apm 8 .Sh HISTORY The .Nm utility appeared in .Fx 5.0 . .Sh AUTHORS .An -nosplit The .Nm utility was written by .An Mitsuru Iwasaki Aq Mt iwasaki@FreeBSD.org . This manual page was written by .An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org . Index: projects/clang370-import/usr.sbin/ndiscvt/inf.c =================================================================== --- projects/clang370-import/usr.sbin/ndiscvt/inf.c (revision 288125) +++ projects/clang370-import/usr.sbin/ndiscvt/inf.c (revision 288126) @@ -1,910 +1,916 @@ /* * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "inf.h" extern FILE *yyin; int yyparse (void); const char *words[W_MAX]; /* More than we'll need. */ int idx; static struct section_head sh; static struct reg_head rh; static struct assign_head ah; static char *sstrdup (const char *); static struct assign *find_assign (const char *, const char *); static struct assign *find_next_assign (struct assign *); static struct section *find_section (const char *); static void dump_deviceids_pci (void); static void dump_deviceids_pcmcia (void); static void dump_deviceids_usb (void); static void dump_pci_id (const char *); static void dump_pcmcia_id (const char *); static void dump_usb_id (const char *); static void dump_regvals (void); static void dump_paramreg (const struct section *, const struct reg *, int); static FILE *ofp; int inf_parse (FILE *fp, FILE *outfp) { TAILQ_INIT(&sh); TAILQ_INIT(&rh); TAILQ_INIT(&ah); ofp = outfp; yyin = fp; yyparse(); dump_deviceids_pci(); dump_deviceids_pcmcia(); dump_deviceids_usb(); fprintf(outfp, "#ifdef NDIS_REGVALS\n"); dump_regvals(); fprintf(outfp, "#endif /* NDIS_REGVALS */\n"); return (0); } void section_add (const char *s) { struct section *sec; sec = malloc(sizeof(struct section)); bzero(sec, sizeof(struct section)); sec->name = s; TAILQ_INSERT_TAIL(&sh, sec, link); return; } static struct assign * find_assign (const char *s, const char *k) { struct assign *assign; char newkey[256]; /* Deal with string section lookups. */ if (k != NULL && k[0] == '%') { bzero(newkey, sizeof(newkey)); strncpy(newkey, k + 1, strlen(k) - 2); k = newkey; } TAILQ_FOREACH(assign, &ah, link) { if (strcasecmp(assign->section->name, s) == 0) { if (k == NULL) return(assign); else if (strcasecmp(assign->key, k) == 0) return(assign); } } return(NULL); } static struct assign * find_next_assign (struct assign *a) { struct assign *assign; TAILQ_FOREACH(assign, &ah, link) { if (assign == a) break; } assign = assign->link.tqe_next; if (assign == NULL || assign->section != a->section) return(NULL); return (assign); } static const char * stringcvt(const char *s) { struct assign *manf; manf = find_assign("strings", s); if (manf == NULL) return(s); return(manf->vals[0]); } struct section * find_section (const char *s) { struct section *section; TAILQ_FOREACH(section, &sh, link) { if (strcasecmp(section->name, s) == 0) return(section); } return(NULL); } static void dump_pcmcia_id(const char *s) { char *manstr, *devstr; char *p0, *p; p0 = __DECONST(char *, s); p = strchr(p0, '\\'); if (p == NULL) return; p0 = p + 1; p = strchr(p0, '-'); if (p == NULL) return; *p = '\0'; manstr = p0; /* Convert any underscores to spaces. */ while (*p0 != '\0') { if (*p0 == '_') *p0 = ' '; p0++; } p0 = p + 1; p = strchr(p0, '-'); if (p == NULL) return; *p = '\0'; devstr = p0; /* Convert any underscores to spaces. */ while (*p0 != '\0') { if (*p0 == '_') *p0 = ' '; p0++; } fprintf(ofp, "\t\\\n\t{ \"%s\", \"%s\", ", manstr, devstr); return; } static void dump_pci_id(const char *s) { char *p; char vidstr[7], didstr[7], subsysstr[14]; p = strcasestr(s, "VEN_"); if (p == NULL) return; p += 4; strcpy(vidstr, "0x"); strncat(vidstr, p, 4); p = strcasestr(s, "DEV_"); if (p == NULL) return; p += 4; strcpy(didstr, "0x"); strncat(didstr, p, 4); if (p == NULL) return; p = strcasestr(s, "SUBSYS_"); if (p == NULL) strcpy(subsysstr, "0x00000000"); else { p += 7; strcpy(subsysstr, "0x"); strncat(subsysstr, p, 8); } fprintf(ofp, "\t\\\n\t{ %s, %s, %s, ", vidstr, didstr, subsysstr); return; } static void dump_usb_id(const char *s) { char *p; char vidstr[7], pidstr[7]; p = strcasestr(s, "VID_"); if (p == NULL) return; p += 4; strcpy(vidstr, "0x"); strncat(vidstr, p, 4); p = strcasestr(s, "PID_"); if (p == NULL) return; p += 4; strcpy(pidstr, "0x"); strncat(pidstr, p, 4); if (p == NULL) return; fprintf(ofp, "\t\\\n\t{ %s, %s, ", vidstr, pidstr); } static void dump_deviceids_pci() { struct assign *manf, *dev; struct section *sec; struct assign *assign; char xpsec[256]; int first = 1, found = 0; /* Find manufacturer name */ manf = find_assign("Manufacturer", NULL); nextmanf: /* Find manufacturer section */ if (manf->vals[1] != NULL && (strcasecmp(manf->vals[1], "NT.5.1") == 0 || strcasecmp(manf->vals[1], "NTx86") == 0 || strcasecmp(manf->vals[1], "NTx86.5.1") == 0 || strcasecmp(manf->vals[1], "NTamd64") == 0)) { /* Handle Windows XP INF files. */ snprintf(xpsec, sizeof(xpsec), "%s.%s", manf->vals[0], manf->vals[1]); sec = find_section(xpsec); } else sec = find_section(manf->vals[0]); /* See if there are any PCI device definitions. */ TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { dev = find_assign("strings", assign->key); if (strcasestr(assign->vals[1], "PCI") != NULL) { found++; break; } } } if (found == 0) goto done; found = 0; if (first == 1) { /* Emit start of PCI device table */ fprintf (ofp, "#define NDIS_PCI_DEV_TABLE"); first = 0; } retry: /* * Now run through all the device names listed * in the manufacturer section and dump out the * device descriptions and vendor/device IDs. */ TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { dev = find_assign("strings", assign->key); /* Emit device IDs. */ if (strcasestr(assign->vals[1], "PCI") != NULL) dump_pci_id(assign->vals[1]); else continue; /* Emit device description */ fprintf (ofp, "\t\\\n\t\"%s\" },", dev->vals[0]); found++; } } /* Someone tried to fool us. Shame on them. */ if (!found) { found++; sec = find_section(manf->vals[0]); goto retry; } /* Handle Manufacturer sections with multiple entries. */ manf = find_next_assign(manf); if (manf != NULL) goto nextmanf; done: /* Emit end of table */ fprintf(ofp, "\n\n"); return; } static void dump_deviceids_pcmcia() { struct assign *manf, *dev; struct section *sec; struct assign *assign; char xpsec[256]; int first = 1, found = 0; /* Find manufacturer name */ manf = find_assign("Manufacturer", NULL); nextmanf: /* Find manufacturer section */ if (manf->vals[1] != NULL && (strcasecmp(manf->vals[1], "NT.5.1") == 0 || strcasecmp(manf->vals[1], "NTx86") == 0 || strcasecmp(manf->vals[1], "NTx86.5.1") == 0 || strcasecmp(manf->vals[1], "NTamd64") == 0)) { /* Handle Windows XP INF files. */ snprintf(xpsec, sizeof(xpsec), "%s.%s", manf->vals[0], manf->vals[1]); sec = find_section(xpsec); } else sec = find_section(manf->vals[0]); /* See if there are any PCMCIA device definitions. */ TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { dev = find_assign("strings", assign->key); if (strcasestr(assign->vals[1], "PCMCIA") != NULL) { found++; break; } } } if (found == 0) goto done; found = 0; if (first == 1) { /* Emit start of PCMCIA device table */ fprintf (ofp, "#define NDIS_PCMCIA_DEV_TABLE"); first = 0; } retry: /* * Now run through all the device names listed * in the manufacturer section and dump out the * device descriptions and vendor/device IDs. */ TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { dev = find_assign("strings", assign->key); /* Emit device IDs. */ if (strcasestr(assign->vals[1], "PCMCIA") != NULL) dump_pcmcia_id(assign->vals[1]); else continue; /* Emit device description */ fprintf (ofp, "\t\\\n\t\"%s\" },", dev->vals[0]); found++; } } /* Someone tried to fool us. Shame on them. */ if (!found) { found++; sec = find_section(manf->vals[0]); goto retry; } /* Handle Manufacturer sections with multiple entries. */ manf = find_next_assign(manf); if (manf != NULL) goto nextmanf; done: /* Emit end of table */ fprintf(ofp, "\n\n"); return; } static void dump_deviceids_usb() { struct assign *manf, *dev; struct section *sec; struct assign *assign; char xpsec[256]; int first = 1, found = 0; /* Find manufacturer name */ manf = find_assign("Manufacturer", NULL); nextmanf: /* Find manufacturer section */ if (manf->vals[1] != NULL && (strcasecmp(manf->vals[1], "NT.5.1") == 0 || strcasecmp(manf->vals[1], "NTx86") == 0 || strcasecmp(manf->vals[1], "NTx86.5.1") == 0 || strcasecmp(manf->vals[1], "NTamd64") == 0)) { /* Handle Windows XP INF files. */ snprintf(xpsec, sizeof(xpsec), "%s.%s", manf->vals[0], manf->vals[1]); sec = find_section(xpsec); } else sec = find_section(manf->vals[0]); /* See if there are any USB device definitions. */ TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { dev = find_assign("strings", assign->key); if (strcasestr(assign->vals[1], "USB") != NULL) { found++; break; } } } if (found == 0) goto done; found = 0; if (first == 1) { /* Emit start of USB device table */ fprintf (ofp, "#define NDIS_USB_DEV_TABLE"); first = 0; } retry: /* * Now run through all the device names listed * in the manufacturer section and dump out the * device descriptions and vendor/device IDs. */ TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { dev = find_assign("strings", assign->key); /* Emit device IDs. */ if (strcasestr(assign->vals[1], "USB") != NULL) dump_usb_id(assign->vals[1]); else continue; /* Emit device description */ fprintf (ofp, "\t\\\n\t\"%s\" },", dev->vals[0]); found++; } } /* Someone tried to fool us. Shame on them. */ if (!found) { found++; sec = find_section(manf->vals[0]); goto retry; } /* Handle Manufacturer sections with multiple entries. */ manf = find_next_assign(manf); if (manf != NULL) goto nextmanf; done: /* Emit end of table */ fprintf(ofp, "\n\n"); return; } static void dump_addreg(const char *s, int devidx) { struct section *sec; struct reg *reg; /* Find the addreg section */ sec = find_section(s); /* Dump all the keys defined in it. */ TAILQ_FOREACH(reg, &rh, link) { /* * Keys with an empty subkey are very easy to parse, * so just deal with them here. If a parameter key * of the same name also exists, prefer that one and * skip this one. */ if (reg->section == sec) { if (reg->subkey == NULL) { fprintf(ofp, "\n\t{ \"%s\",", reg->key); fprintf(ofp,"\n\t\"%s \",", reg->key); fprintf(ofp, "\n\t{ \"%s\" }, %d },", reg->value == NULL ? "" : stringcvt(reg->value), devidx); } else if (strncasecmp(reg->subkey, "Ndi\\params", strlen("Ndi\\params")-1) == 0 && (reg->key != NULL && strcasecmp(reg->key, "ParamDesc") == 0)) dump_paramreg(sec, reg, devidx); } } return; } static void dump_enumreg(const struct section *s, const struct reg *r) { struct reg *reg; char enumkey[256]; sprintf(enumkey, "%s\\enum", r->subkey); TAILQ_FOREACH(reg, &rh, link) { if (reg->section != s) continue; if (reg->subkey == NULL || strcasecmp(reg->subkey, enumkey)) continue; fprintf(ofp, " [%s=%s]", reg->key, stringcvt(reg->value)); } return; } static void dump_editreg(const struct section *s, const struct reg *r) { struct reg *reg; TAILQ_FOREACH(reg, &rh, link) { if (reg->section != s) continue; if (reg->subkey == NULL || strcasecmp(reg->subkey, r->subkey)) continue; if (reg->key == NULL) continue; if (strcasecmp(reg->key, "LimitText") == 0) fprintf(ofp, " [maxchars=%s]", reg->value); if (strcasecmp(reg->key, "Optional") == 0 && strcmp(reg->value, "1") == 0) fprintf(ofp, " [optional]"); } return; } /* Use this for int too */ static void dump_dwordreg(const struct section *s, const struct reg *r) { struct reg *reg; TAILQ_FOREACH(reg, &rh, link) { if (reg->section != s) continue; if (reg->subkey == NULL || strcasecmp(reg->subkey, r->subkey)) continue; if (reg->key == NULL) continue; if (strcasecmp(reg->key, "min") == 0) fprintf(ofp, " [min=%s]", reg->value); if (strcasecmp(reg->key, "max") == 0) fprintf(ofp, " [max=%s]", reg->value); } return; } static void dump_defaultinfo(const struct section *s, const struct reg *r, int devidx) { struct reg *reg; TAILQ_FOREACH(reg, &rh, link) { if (reg->section != s) continue; if (reg->subkey == NULL || strcasecmp(reg->subkey, r->subkey)) continue; if (reg->key == NULL || strcasecmp(reg->key, "Default")) continue; fprintf(ofp, "\n\t{ \"%s\" }, %d },", reg->value == NULL ? "" : stringcvt(reg->value), devidx); return; } /* Default registry entry missing */ fprintf(ofp, "\n\t{ \"\" }, %d },", devidx); return; } static void dump_paramdesc(const struct section *s, const struct reg *r) { struct reg *reg; TAILQ_FOREACH(reg, &rh, link) { if (reg->section != s) continue; if (reg->subkey == NULL || strcasecmp(reg->subkey, r->subkey)) continue; if (reg->key == NULL || strcasecmp(reg->key, "ParamDesc")) continue; fprintf(ofp, "\n\t\"%s", stringcvt(r->value)); break; } return; } static void dump_typeinfo(const struct section *s, const struct reg *r) { struct reg *reg; TAILQ_FOREACH(reg, &rh, link) { if (reg->section != s) continue; if (reg->subkey == NULL || strcasecmp(reg->subkey, r->subkey)) continue; if (reg->key == NULL) continue; if (strcasecmp(reg->key, "type")) continue; if (strcasecmp(reg->value, "dword") == 0 || strcasecmp(reg->value, "int") == 0) dump_dwordreg(s, r); if (strcasecmp(reg->value, "enum") == 0) dump_enumreg(s, r); if (strcasecmp(reg->value, "edit") == 0) dump_editreg(s, r); } return; } static void dump_paramreg(const struct section *s, const struct reg *r, int devidx) { const char *keyname; keyname = r->subkey + strlen("Ndi\\params\\"); fprintf(ofp, "\n\t{ \"%s\",", keyname); dump_paramdesc(s, r); dump_typeinfo(s, r); fprintf(ofp, "\","); dump_defaultinfo(s, r, devidx); return; } static void dump_regvals(void) { struct assign *manf, *dev; struct section *sec; struct assign *assign; char sname[256]; int found = 0, i, is_winxp = 0, is_winnt = 0, devidx = 0; /* Find signature to check for special case of WinNT. */ assign = find_assign("version", "signature"); if (strcasecmp(assign->vals[0], "$windows nt$") == 0) is_winnt++; /* Emit start of block */ fprintf (ofp, "ndis_cfg ndis_regvals[] = {"); /* Find manufacturer name */ manf = find_assign("Manufacturer", NULL); nextmanf: /* Find manufacturer section */ if (manf->vals[1] != NULL && (strcasecmp(manf->vals[1], "NT.5.1") == 0 || strcasecmp(manf->vals[1], "NTx86") == 0 || strcasecmp(manf->vals[1], "NTx86.5.1") == 0 || strcasecmp(manf->vals[1], "NTamd64") == 0)) { is_winxp++; /* Handle Windows XP INF files. */ snprintf(sname, sizeof(sname), "%s.%s", manf->vals[0], manf->vals[1]); sec = find_section(sname); } else sec = find_section(manf->vals[0]); retry: TAILQ_FOREACH(assign, &ah, link) { if (assign->section == sec) { found++; /* * Find all the AddReg sections. * Look for section names with .NT, unless * this is a WinXP .INF file. */ if (is_winxp) { sprintf(sname, "%s.NTx86", assign->vals[0]); dev = find_assign(sname, "AddReg"); if (dev == NULL) { sprintf(sname, "%s.NT", assign->vals[0]); dev = find_assign(sname, "AddReg"); } if (dev == NULL) dev = find_assign(assign->vals[0], "AddReg"); } else { sprintf(sname, "%s.NT", assign->vals[0]); dev = find_assign(sname, "AddReg"); if (dev == NULL && is_winnt) dev = find_assign(assign->vals[0], "AddReg"); } /* Section not found. */ if (dev == NULL) continue; for (i = 0; i < W_MAX; i++) { if (dev->vals[i] != NULL) dump_addreg(dev->vals[i], devidx); } devidx++; } } if (!found) { sec = find_section(manf->vals[0]); is_winxp = 0; found++; goto retry; } manf = find_next_assign(manf); if (manf != NULL) goto nextmanf; fprintf(ofp, "\n\t{ NULL, NULL, { 0 }, 0 }\n};\n\n"); return; } void assign_add (const char *a) { struct assign *assign; int i; assign = malloc(sizeof(struct assign)); bzero(assign, sizeof(struct assign)); assign->section = TAILQ_LAST(&sh, section_head); assign->key = sstrdup(a); for (i = 0; i < idx; i++) assign->vals[(idx - 1) - i] = sstrdup(words[i]); TAILQ_INSERT_TAIL(&ah, assign, link); clear_words(); return; } void define_add (const char *d __unused) { #ifdef notdef fprintf(stderr, "define \"%s\"\n", d); #endif return; } static char * sstrdup(const char *str) { if (str != NULL && strlen(str)) return (strdup(str)); return (NULL); } static int satoi (const char *nptr) { if (nptr != NULL && strlen(nptr)) return (atoi(nptr)); return (0); } void regkey_add (const char *r) { struct reg *reg; reg = malloc(sizeof(struct reg)); bzero(reg, sizeof(struct reg)); reg->section = TAILQ_LAST(&sh, section_head); reg->root = sstrdup(r); reg->subkey = sstrdup(words[3]); reg->key = sstrdup(words[2]); reg->flags = satoi(words[1]); reg->value = sstrdup(words[0]); TAILQ_INSERT_TAIL(&rh, reg, link); free(__DECONST(char *, r)); clear_words(); return; } void push_word (const char *w) { + + if (idx == W_MAX) { + fprintf(stderr, "too many words; try bumping W_MAX in inf.h\n"); + exit(1); + } + if (w && strlen(w)) words[idx++] = w; else words[idx++] = NULL; return; } void clear_words (void) { int i; for (i = 0; i < idx; i++) { if (words[i]) { free(__DECONST(char *, words[i])); } } idx = 0; bzero(words, sizeof(words)); return; } Index: projects/clang370-import/usr.sbin/ndiscvt/inf.h =================================================================== --- projects/clang370-import/usr.sbin/ndiscvt/inf.h (revision 288125) +++ projects/clang370-import/usr.sbin/ndiscvt/inf.h (revision 288126) @@ -1,61 +1,61 @@ /* * $Id: inf.h,v 1.3 2003/11/30 21:58:16 winter Exp $ * * $FreeBSD$ */ -#define W_MAX 16 +#define W_MAX 32 struct section { const char * name; TAILQ_ENTRY(section) link; }; TAILQ_HEAD(section_head, section); struct assign { struct section *section; const char * key; const char * vals[W_MAX]; TAILQ_ENTRY(assign) link; }; TAILQ_HEAD(assign_head, assign); struct reg { struct section *section; const char * root; const char * subkey; const char * key; u_int flags; const char * value; TAILQ_ENTRY(reg) link; }; TAILQ_HEAD(reg_head, reg); #define FLG_ADDREG_TYPE_SZ 0x00000000 #define FLG_ADDREG_BINVALUETYPE 0x00000001 #define FLG_ADDREG_NOCLOBBER 0x00000002 #define FLG_ADDREG_DELVAL 0x00000004 #define FLG_ADDREG_APPEND 0x00000008 #define FLG_ADDREG_KEYONLY 0x00000010 #define FLG_ADDREG_OVERWRITEONLY 0x00000020 #define FLG_ADDREG_64BITKEY 0x00001000 #define FLG_ADDREG_KEYONLY_COMMON 0x00002000 #define FLG_ADDREG_32BITKEY 0x00004000 #define FLG_ADDREG_TYPE_MULTI_SZ 0x00010000 #define FLG_ADDREG_TYPE_EXPAND_SZ 0x00020000 #define FLG_ADDREG_TYPE_DWORD 0x00010001 #define FLG_ADDREG_TYPE_NONE 0x00020001 extern void section_add (const char *); extern void assign_add (const char *); extern void define_add (const char *); extern void regkey_add (const char *); extern void push_word (const char *); extern void clear_words (void); extern int inf_parse (FILE *, FILE *); Index: projects/clang370-import/usr.sbin/ndiscvt =================================================================== --- projects/clang370-import/usr.sbin/ndiscvt (revision 288125) +++ projects/clang370-import/usr.sbin/ndiscvt (revision 288126) Property changes on: projects/clang370-import/usr.sbin/ndiscvt ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/usr.sbin/ndiscvt:r284644-288125 Index: projects/clang370-import =================================================================== --- projects/clang370-import (revision 288125) +++ projects/clang370-import (revision 288126) Property changes on: projects/clang370-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r288100-288125