diff --git a/contrib/atf/atf-c++/detail/test_helpers.cpp b/contrib/atf/atf-c++/detail/test_helpers.cpp index d13b12467ce5..dc35da96be03 100644 --- a/contrib/atf/atf-c++/detail/test_helpers.cpp +++ b/contrib/atf/atf-c++/detail/test_helpers.cpp @@ -1,123 +1,123 @@ // // Automated Testing Framework (atf) // // Copyright (c) 2009 The NetBSD Foundation, Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND // CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. // IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE // GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER // IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN // IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // #include #include #include #include #include "../check.hpp" #include "../config.hpp" #include "../macros.hpp" #include "fs.hpp" #include "process.hpp" #include "test_helpers.hpp" void build_check_cxx_o_aux(const atf::fs::path& sfile, const char* failmsg, const bool expect_pass) { std::vector< std::string > optargs; optargs.push_back("-I" + atf::config::get("atf_includedir")); optargs.push_back("-Wall"); optargs.push_back("-Werror"); const bool result = atf::check::build_cxx_o( sfile.str(), "test.o", atf::process::argv_array(optargs)); if ((expect_pass && !result) || (!expect_pass && result)) ATF_FAIL(failmsg); } void build_check_cxx_o(const atf::tests::tc& tc, const char* sfile, const char* failmsg, const bool expect_pass) { const atf::fs::path sfilepath = atf::fs::path(tc.get_config_var("srcdir")) / sfile; build_check_cxx_o_aux(sfilepath, failmsg, expect_pass); } void header_check(const char *hdrname) { - std::ofstream srcfile("test.c"); + std::ofstream srcfile("test.cpp"); ATF_REQUIRE(srcfile); srcfile << "#include <" << hdrname << ">\n"; srcfile.close(); const std::string failmsg = std::string("Header check failed; ") + hdrname + " is not self-contained"; - build_check_cxx_o_aux(atf::fs::path("test.c"), failmsg.c_str(), true); + build_check_cxx_o_aux(atf::fs::path("test.cpp"), failmsg.c_str(), true); } atf::fs::path get_process_helpers_path(const atf::tests::tc& tc, bool is_detail) { if (is_detail) return atf::fs::path(tc.get_config_var("srcdir")) / ".." / ".." / "atf-c" / "detail" / "process_helpers"; else return atf::fs::path(tc.get_config_var("srcdir")) / ".." / "atf-c" / "detail" / "process_helpers"; } void test_helpers_detail::check_equal(const char* expected[], const string_vector& actual) { const char** expected_iter = expected; string_vector::const_iterator actual_iter = actual.begin(); bool equals = true; while (equals && *expected_iter != NULL && actual_iter != actual.end()) { if (*expected_iter != *actual_iter) { equals = false; } else { expected_iter++; actual_iter++; } } if (equals && ((*expected_iter == NULL && actual_iter != actual.end()) || (*expected_iter != NULL && actual_iter == actual.end()))) equals = false; if (!equals) { std::cerr << "EXPECTED:\n"; for (expected_iter = expected; *expected_iter != NULL; expected_iter++) std::cerr << *expected_iter << "\n"; std::cerr << "ACTUAL:\n"; for (actual_iter = actual.begin(); actual_iter != actual.end(); actual_iter++) std::cerr << *actual_iter << "\n"; ATF_FAIL("Expected results differ to actual values"); } } diff --git a/sbin/geom/class/part/geom_part.c b/sbin/geom/class/part/geom_part.c index 604e65970a27..ee4b2a939f82 100644 --- a/sbin/geom/class/part/geom_part.c +++ b/sbin/geom/class/part/geom_part.c @@ -1,1309 +1,1320 @@ /*- * Copyright (c) 2007, 2008 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "core/geom.h" #include "misc/subr.h" #ifdef STATIC_GEOM_CLASSES #define PUBSYM(x) gpart_##x #else #define PUBSYM(x) x #endif uint32_t PUBSYM(lib_version) = G_LIB_VERSION; uint32_t PUBSYM(version) = 0; static char sstart[32]; static char ssize[32]; volatile sig_atomic_t undo_restore; #define GPART_AUTOFILL "*" #define GPART_FLAGS "C" #define GPART_PARAM_BOOTCODE "bootcode" #define GPART_PARAM_INDEX "index" #define GPART_PARAM_PARTCODE "partcode" static struct gclass *find_class(struct gmesh *, const char *); static struct ggeom * find_geom(struct gclass *, const char *); static const char *find_geomcfg(struct ggeom *, const char *); static const char *find_provcfg(struct gprovider *, const char *); static struct gprovider *find_provider(struct ggeom *, off_t); static const char *fmtsize(int64_t); static int gpart_autofill(struct gctl_req *); static int gpart_autofill_resize(struct gctl_req *); static void gpart_bootcode(struct gctl_req *, unsigned int); static void *gpart_bootfile_read(const char *, ssize_t *); static void gpart_issue(struct gctl_req *, unsigned int); static void gpart_show(struct gctl_req *, unsigned int); static void gpart_show_geom(struct ggeom *, const char *, int); static int gpart_show_hasopt(struct gctl_req *, const char *, const char *); static void gpart_write_partcode(struct ggeom *, int, void *, ssize_t); static void gpart_write_partcode_vtoc8(struct ggeom *, int, void *); static void gpart_print_error(const char *); static void gpart_backup(struct gctl_req *, unsigned int); static void gpart_restore(struct gctl_req *, unsigned int); struct g_command PUBSYM(class_commands)[] = { { "add", 0, gpart_issue, { { 'a', "alignment", GPART_AUTOFILL, G_TYPE_STRING }, { 'b', "start", GPART_AUTOFILL, G_TYPE_STRING }, { 's', "size", GPART_AUTOFILL, G_TYPE_STRING }, { 't', "type", NULL, G_TYPE_STRING }, { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, { 'l', "label", G_VAL_OPTIONAL, G_TYPE_STRING }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-t type [-a alignment] [-b start] [-s size] [-i index] " "[-l label] [-f flags] geom" }, { "backup", 0, gpart_backup, G_NULL_OPTS, "geom" }, { "bootcode", 0, gpart_bootcode, { { 'b', GPART_PARAM_BOOTCODE, G_VAL_OPTIONAL, G_TYPE_STRING }, { 'p', GPART_PARAM_PARTCODE, G_VAL_OPTIONAL, G_TYPE_STRING }, { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "[-b bootcode] [-p partcode -i index] [-f flags] geom" }, { "commit", 0, gpart_issue, G_NULL_OPTS, "geom" }, { "create", 0, gpart_issue, { { 's', "scheme", NULL, G_TYPE_STRING }, { 'n', "entries", G_VAL_OPTIONAL, G_TYPE_NUMBER }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-s scheme [-n entries] [-f flags] provider" }, { "delete", 0, gpart_issue, { { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-i index [-f flags] geom" }, { "destroy", 0, gpart_issue, { { 'F', "force", NULL, G_TYPE_BOOL }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "[-F] [-f flags] geom" }, { "modify", 0, gpart_issue, { { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER }, { 'l', "label", G_VAL_OPTIONAL, G_TYPE_STRING }, { 't', "type", G_VAL_OPTIONAL, G_TYPE_STRING }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-i index [-l label] [-t type] [-f flags] geom" }, { "set", 0, gpart_issue, { { 'a', "attrib", NULL, G_TYPE_STRING }, { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-a attrib [-i index] [-f flags] geom" }, { "show", 0, gpart_show, { { 'l', "show_label", NULL, G_TYPE_BOOL }, { 'r', "show_rawtype", NULL, G_TYPE_BOOL }, { 'p', "show_providers", NULL, G_TYPE_BOOL }, G_OPT_SENTINEL }, "[-l | -r] [-p] [geom ...]" }, { "undo", 0, gpart_issue, G_NULL_OPTS, "geom" }, { "unset", 0, gpart_issue, { { 'a', "attrib", NULL, G_TYPE_STRING }, { 'i', GPART_PARAM_INDEX, G_VAL_OPTIONAL, G_TYPE_NUMBER }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-a attrib [-i index] [-f flags] geom" }, { "resize", 0, gpart_issue, { { 'a', "alignment", GPART_AUTOFILL, G_TYPE_STRING }, { 's', "size", GPART_AUTOFILL, G_TYPE_STRING }, { 'i', GPART_PARAM_INDEX, NULL, G_TYPE_NUMBER }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "-i index [-a alignment] [-s size] [-f flags] geom" }, { "restore", 0, gpart_restore, { { 'F', "force", NULL, G_TYPE_BOOL }, { 'l', "restore_labels", NULL, G_TYPE_BOOL }, { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "[-lF] [-f flags] provider [...]" }, { "recover", 0, gpart_issue, { { 'f', "flags", GPART_FLAGS, G_TYPE_STRING }, G_OPT_SENTINEL }, "[-f flags] geom" }, G_CMD_SENTINEL }; static struct gclass * find_class(struct gmesh *mesh, const char *name) { struct gclass *classp; LIST_FOREACH(classp, &mesh->lg_class, lg_class) { if (strcmp(classp->lg_name, name) == 0) return (classp); } return (NULL); } static struct ggeom * find_geom(struct gclass *classp, const char *name) { struct ggeom *gp; if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) name += sizeof(_PATH_DEV) - 1; LIST_FOREACH(gp, &classp->lg_geom, lg_geom) { if (strcmp(gp->lg_name, name) == 0) return (gp); } return (NULL); } static const char * find_geomcfg(struct ggeom *gp, const char *cfg) { struct gconfig *gc; LIST_FOREACH(gc, &gp->lg_config, lg_config) { if (!strcmp(gc->lg_name, cfg)) return (gc->lg_val); } return (NULL); } static const char * find_provcfg(struct gprovider *pp, const char *cfg) { struct gconfig *gc; LIST_FOREACH(gc, &pp->lg_config, lg_config) { if (!strcmp(gc->lg_name, cfg)) return (gc->lg_val); } return (NULL); } static struct gprovider * find_provider(struct ggeom *gp, off_t minsector) { struct gprovider *pp, *bestpp; const char *s; off_t sector, bestsector; bestpp = NULL; bestsector = 0; LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { s = find_provcfg(pp, "start"); sector = (off_t)strtoimax(s, NULL, 0); if (sector < minsector) continue; if (bestpp != NULL && sector >= bestsector) continue; bestpp = pp; bestsector = sector; } return (bestpp); } static const char * fmtsize(int64_t rawsz) { static char buf[5]; humanize_number(buf, sizeof(buf), rawsz, "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); return (buf); } static const char * fmtattrib(struct gprovider *pp) { static char buf[128]; struct gconfig *gc; u_int idx; buf[0] = '\0'; idx = 0; LIST_FOREACH(gc, &pp->lg_config, lg_config) { if (strcmp(gc->lg_name, "attrib") != 0) continue; idx += snprintf(buf + idx, sizeof(buf) - idx, "%s%s", (idx == 0) ? " [" : ",", gc->lg_val); } if (idx > 0) snprintf(buf + idx, sizeof(buf) - idx, "] "); return (buf); } #define ALIGNDOWN(d, a) ((d) - (d) % (a)) #define ALIGNUP(d, a) ((d) % (a) ? (d) - (d) % (a) + (a): (d)) static int gpart_autofill_resize(struct gctl_req *req) { struct gmesh mesh; struct gclass *cp; struct ggeom *gp; struct gprovider *pp; off_t last, size, start, new_size; off_t lba, new_lba, alignment, offset; const char *s; int error, idx, has_alignment; idx = (int)gctl_get_intmax(req, GPART_PARAM_INDEX); if (idx < 1) errx(EXIT_FAILURE, "invalid partition index"); error = geom_gettree(&mesh); if (error) return (error); s = gctl_get_ascii(req, "class"); if (s == NULL) abort(); cp = find_class(&mesh, s); if (cp == NULL) errx(EXIT_FAILURE, "Class %s not found.", s); s = gctl_get_ascii(req, "arg0"); if (s == NULL) abort(); gp = find_geom(cp, s); if (gp == NULL) errx(EXIT_FAILURE, "No such geom: %s.", s); pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; if (pp == NULL) errx(EXIT_FAILURE, "Provider for geom %s not found.", s); s = gctl_get_ascii(req, "alignment"); has_alignment = (*s == '*') ? 0 : 1; alignment = 1; if (has_alignment) { error = g_parse_lba(s, pp->lg_sectorsize, &alignment); if (error) errc(EXIT_FAILURE, error, "Invalid alignment param"); if (alignment == 0) errx(EXIT_FAILURE, "Invalid alignment param"); } else { lba = pp->lg_stripesize / pp->lg_sectorsize; if (lba > 0) alignment = lba; } error = gctl_delete_param(req, "alignment"); if (error) errc(EXIT_FAILURE, error, "internal error"); s = gctl_get_ascii(req, "size"); if (*s == '*') new_size = 0; else { error = g_parse_lba(s, pp->lg_sectorsize, &new_size); if (error) errc(EXIT_FAILURE, error, "Invalid size param"); /* no autofill necessary. */ if (has_alignment == 0) goto done; } offset = (pp->lg_stripeoffset / pp->lg_sectorsize) % alignment; s = find_geomcfg(gp, "last"); if (s == NULL) errx(EXIT_FAILURE, "Final block not found for geom %s", gp->lg_name); last = (off_t)strtoimax(s, NULL, 0); LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { s = find_provcfg(pp, "index"); if (s == NULL) continue; if (atoi(s) == idx) break; } if (pp == NULL) errx(EXIT_FAILURE, "invalid partition index"); s = find_provcfg(pp, "start"); start = (off_t)strtoimax(s, NULL, 0); s = find_provcfg(pp, "end"); lba = (off_t)strtoimax(s, NULL, 0); size = lba - start + 1; pp = find_provider(gp, lba + 1); if (new_size > 0 && (new_size <= size || pp == NULL)) { /* The start offset may be not aligned, so we align the end * offset and then calculate the size. */ new_size = ALIGNDOWN(start + offset + new_size, alignment) - start - offset; goto done; } if (pp == NULL) { new_size = ALIGNDOWN(last + offset + 1, alignment) - start - offset; if (new_size < size) return (ENOSPC); } else { s = find_provcfg(pp, "start"); new_lba = (off_t)strtoimax(s, NULL, 0); /* * Is there any free space between current and * next providers? */ new_lba = ALIGNDOWN(new_lba + offset, alignment) - offset; if (new_lba > lba) new_size = new_lba - start; else { geom_deletetree(&mesh); return (ENOSPC); } } done: snprintf(ssize, sizeof(ssize), "%jd", (intmax_t)new_size); gctl_change_param(req, "size", -1, ssize); geom_deletetree(&mesh); return (0); } static int gpart_autofill(struct gctl_req *req) { struct gmesh mesh; struct gclass *cp; struct ggeom *gp; struct gprovider *pp; off_t first, last, a_first; off_t size, start, a_lba; off_t lba, len, alignment, offset; uintmax_t grade; const char *s; int error, has_size, has_start, has_alignment; s = gctl_get_ascii(req, "verb"); if (strcmp(s, "resize") == 0) return gpart_autofill_resize(req); if (strcmp(s, "add") != 0) return (0); error = geom_gettree(&mesh); if (error) return (error); s = gctl_get_ascii(req, "class"); if (s == NULL) abort(); cp = find_class(&mesh, s); if (cp == NULL) errx(EXIT_FAILURE, "Class %s not found.", s); s = gctl_get_ascii(req, "arg0"); if (s == NULL) abort(); gp = find_geom(cp, s); - if (gp == NULL) - errx(EXIT_FAILURE, "No such geom: %s.", s); + if (gp == NULL) { + if (g_device_path(s) == NULL) { + errx(EXIT_FAILURE, "No such geom %s.", s); + } else { + /* + * We don't free memory allocated by g_device_path() as + * we are about to exit. + */ + errx(EXIT_FAILURE, + "No partitioning scheme found on geom %s. Create one first using 'gpart create'.", + s); + } + } pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; if (pp == NULL) errx(EXIT_FAILURE, "Provider for geom %s not found.", s); s = gctl_get_ascii(req, "alignment"); has_alignment = (*s == '*') ? 0 : 1; alignment = 1; if (has_alignment) { error = g_parse_lba(s, pp->lg_sectorsize, &alignment); if (error) errc(EXIT_FAILURE, error, "Invalid alignment param"); if (alignment == 0) errx(EXIT_FAILURE, "Invalid alignment param"); } error = gctl_delete_param(req, "alignment"); if (error) errc(EXIT_FAILURE, error, "internal error"); s = gctl_get_ascii(req, "size"); has_size = (*s == '*') ? 0 : 1; size = 0; if (has_size) { error = g_parse_lba(s, pp->lg_sectorsize, &size); if (error) errc(EXIT_FAILURE, error, "Invalid size param"); } s = gctl_get_ascii(req, "start"); has_start = (*s == '*') ? 0 : 1; start = 0ULL; if (has_start) { error = g_parse_lba(s, pp->lg_sectorsize, &start); if (error) errc(EXIT_FAILURE, error, "Invalid start param"); } /* No autofill necessary. */ if (has_size && has_start && !has_alignment) goto done; len = pp->lg_stripesize / pp->lg_sectorsize; if (len > 0 && !has_alignment) alignment = len; /* Adjust parameters to stripeoffset */ offset = (pp->lg_stripeoffset / pp->lg_sectorsize) % alignment; start = ALIGNUP(start + offset, alignment); if (size > alignment) size = ALIGNDOWN(size, alignment); s = find_geomcfg(gp, "first"); if (s == NULL) errx(EXIT_FAILURE, "Starting block not found for geom %s", gp->lg_name); first = (off_t)strtoimax(s, NULL, 0); s = find_geomcfg(gp, "last"); if (s == NULL) errx(EXIT_FAILURE, "Final block not found for geom %s", gp->lg_name); last = (off_t)strtoimax(s, NULL, 0); grade = ~0ULL; a_first = ALIGNUP(first + offset, alignment); last = ALIGNDOWN(last + offset, alignment); if (a_first < start) a_first = start; while ((pp = find_provider(gp, first)) != NULL) { s = find_provcfg(pp, "start"); lba = (off_t)strtoimax(s, NULL, 0); a_lba = ALIGNDOWN(lba + offset, alignment); if (first < a_lba && a_first < a_lba) { /* Free space [first, lba> */ len = a_lba - a_first; if (has_size) { if (len >= size && (uintmax_t)(len - size) < grade) { start = a_first; grade = len - size; } } else if (has_start) { if (start >= a_first && start < a_lba) { size = a_lba - start; grade = start - a_first; } } else { if (grade == ~0ULL || len > size) { start = a_first; size = len; grade = 0; } } } s = find_provcfg(pp, "end"); first = (off_t)strtoimax(s, NULL, 0) + 1; if (first > a_first) a_first = ALIGNUP(first + offset, alignment); } if (a_first <= last) { /* Free space [first-last] */ len = ALIGNDOWN(last - a_first + 1, alignment); if (has_size) { if (len >= size && (uintmax_t)(len - size) < grade) { start = a_first; grade = len - size; } } else if (has_start) { if (start >= a_first && start <= last) { size = ALIGNDOWN(last - start + 1, alignment); grade = start - a_first; } } else { if (grade == ~0ULL || len > size) { start = a_first; size = len; grade = 0; } } } if (grade == ~0ULL) { geom_deletetree(&mesh); return (ENOSPC); } start -= offset; /* Return back to real offset */ done: snprintf(ssize, sizeof(ssize), "%jd", (intmax_t)size); gctl_change_param(req, "size", -1, ssize); snprintf(sstart, sizeof(sstart), "%jd", (intmax_t)start); gctl_change_param(req, "start", -1, sstart); geom_deletetree(&mesh); return (0); } static void gpart_show_geom(struct ggeom *gp, const char *element, int show_providers) { struct gprovider *pp; const char *s, *scheme; off_t first, last, sector, end; off_t length, secsz; int idx, wblocks, wname, wmax; scheme = find_geomcfg(gp, "scheme"); if (scheme == NULL) errx(EXIT_FAILURE, "Scheme not found for geom %s", gp->lg_name); s = find_geomcfg(gp, "first"); if (s == NULL) errx(EXIT_FAILURE, "Starting block not found for geom %s", gp->lg_name); first = (off_t)strtoimax(s, NULL, 0); s = find_geomcfg(gp, "last"); if (s == NULL) errx(EXIT_FAILURE, "Final block not found for geom %s", gp->lg_name); last = (off_t)strtoimax(s, NULL, 0); wblocks = strlen(s); s = find_geomcfg(gp, "state"); if (s == NULL) errx(EXIT_FAILURE, "State not found for geom %s", gp->lg_name); if (s != NULL && *s != 'C') s = NULL; wmax = strlen(gp->lg_name); if (show_providers) { LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { wname = strlen(pp->lg_name); if (wname > wmax) wmax = wname; } } wname = wmax; pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; secsz = pp->lg_sectorsize; printf("=>%*jd %*jd %*s %s (%s)%s\n", wblocks, (intmax_t)first, wblocks, (intmax_t)(last - first + 1), wname, gp->lg_name, scheme, fmtsize(pp->lg_mediasize), s ? " [CORRUPT]": ""); while ((pp = find_provider(gp, first)) != NULL) { s = find_provcfg(pp, "start"); sector = (off_t)strtoimax(s, NULL, 0); s = find_provcfg(pp, "end"); end = (off_t)strtoimax(s, NULL, 0); length = end - sector + 1; s = find_provcfg(pp, "index"); idx = atoi(s); if (first < sector) { printf(" %*jd %*jd %*s - free - (%s)\n", wblocks, (intmax_t)first, wblocks, (intmax_t)(sector - first), wname, "", fmtsize((sector - first) * secsz)); } if (show_providers) { printf(" %*jd %*jd %*s %s %s (%s)\n", wblocks, (intmax_t)sector, wblocks, (intmax_t)length, wname, pp->lg_name, find_provcfg(pp, element), fmtattrib(pp), fmtsize(pp->lg_mediasize)); } else printf(" %*jd %*jd %*d %s %s (%s)\n", wblocks, (intmax_t)sector, wblocks, (intmax_t)length, wname, idx, find_provcfg(pp, element), fmtattrib(pp), fmtsize(pp->lg_mediasize)); first = end + 1; } if (first <= last) { length = last - first + 1; printf(" %*jd %*jd %*s - free - (%s)\n", wblocks, (intmax_t)first, wblocks, (intmax_t)length, wname, "", fmtsize(length * secsz)); } printf("\n"); } static int gpart_show_hasopt(struct gctl_req *req, const char *opt, const char *elt) { if (!gctl_get_int(req, "%s", opt)) return (0); if (elt != NULL) errx(EXIT_FAILURE, "-l and -r are mutually exclusive"); return (1); } static void gpart_show(struct gctl_req *req, unsigned int fl __unused) { struct gmesh mesh; struct gclass *classp; struct ggeom *gp; const char *element, *name; int error, i, nargs, show_providers; element = NULL; if (gpart_show_hasopt(req, "show_label", element)) element = "label"; if (gpart_show_hasopt(req, "show_rawtype", element)) element = "rawtype"; if (element == NULL) element = "type"; name = gctl_get_ascii(req, "class"); if (name == NULL) abort(); error = geom_gettree(&mesh); if (error != 0) errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); classp = find_class(&mesh, name); if (classp == NULL) { geom_deletetree(&mesh); errx(EXIT_FAILURE, "Class %s not found.", name); } show_providers = gctl_get_int(req, "show_providers"); nargs = gctl_get_int(req, "nargs"); if (nargs > 0) { for (i = 0; i < nargs; i++) { name = gctl_get_ascii(req, "arg%d", i); gp = find_geom(classp, name); if (gp != NULL) gpart_show_geom(gp, element, show_providers); else errx(EXIT_FAILURE, "No such geom: %s.", name); } } else { LIST_FOREACH(gp, &classp->lg_geom, lg_geom) { gpart_show_geom(gp, element, show_providers); } } geom_deletetree(&mesh); } static void gpart_backup(struct gctl_req *req, unsigned int fl __unused) { struct gmesh mesh; struct gclass *classp; struct gprovider *pp; struct ggeom *gp; const char *s, *scheme; off_t sector, end; off_t length; int error, i, windex, wblocks, wtype; if (gctl_get_int(req, "nargs") != 1) errx(EXIT_FAILURE, "Invalid number of arguments."); error = geom_gettree(&mesh); if (error != 0) errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); s = gctl_get_ascii(req, "class"); if (s == NULL) abort(); classp = find_class(&mesh, s); if (classp == NULL) { geom_deletetree(&mesh); errx(EXIT_FAILURE, "Class %s not found.", s); } s = gctl_get_ascii(req, "arg0"); if (s == NULL) abort(); gp = find_geom(classp, s); if (gp == NULL) errx(EXIT_FAILURE, "No such geom: %s.", s); scheme = find_geomcfg(gp, "scheme"); if (scheme == NULL) abort(); pp = LIST_FIRST(&gp->lg_consumer)->lg_provider; s = find_geomcfg(gp, "last"); if (s == NULL) abort(); wblocks = strlen(s); wtype = 0; LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { s = find_provcfg(pp, "type"); i = strlen(s); if (i > wtype) wtype = i; } s = find_geomcfg(gp, "entries"); if (s == NULL) abort(); windex = strlen(s); printf("%s %s\n", scheme, s); LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { s = find_provcfg(pp, "start"); sector = (off_t)strtoimax(s, NULL, 0); s = find_provcfg(pp, "end"); end = (off_t)strtoimax(s, NULL, 0); length = end - sector + 1; s = find_provcfg(pp, "label"); printf("%-*s %*s %*jd %*jd %s %s\n", windex, find_provcfg(pp, "index"), wtype, find_provcfg(pp, "type"), wblocks, (intmax_t)sector, wblocks, (intmax_t)length, (s != NULL) ? s: "", fmtattrib(pp)); } geom_deletetree(&mesh); } static int skip_line(const char *p) { while (*p != '\0') { if (*p == '#') return (1); if (isspace(*p) == 0) return (0); p++; } return (1); } static void gpart_sighndl(int sig __unused) { undo_restore = 1; } static void gpart_restore(struct gctl_req *req, unsigned int fl __unused) { struct gmesh mesh; struct gclass *classp; struct gctl_req *r; struct ggeom *gp; struct sigaction si_sa; const char *s, *flags, *errstr, *label; char **ap, *argv[6], line[BUFSIZ], *pline; int error, forced, i, l, nargs, created, rl; intmax_t n; nargs = gctl_get_int(req, "nargs"); if (nargs < 1) errx(EXIT_FAILURE, "Invalid number of arguments."); forced = gctl_get_int(req, "force"); flags = gctl_get_ascii(req, "flags"); rl = gctl_get_int(req, "restore_labels"); s = gctl_get_ascii(req, "class"); if (s == NULL) abort(); error = geom_gettree(&mesh); if (error != 0) errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); classp = find_class(&mesh, s); if (classp == NULL) { geom_deletetree(&mesh); errx(EXIT_FAILURE, "Class %s not found.", s); } sigemptyset(&si_sa.sa_mask); si_sa.sa_flags = 0; si_sa.sa_handler = gpart_sighndl; if (sigaction(SIGINT, &si_sa, 0) == -1) err(EXIT_FAILURE, "sigaction SIGINT"); if (forced) { /* destroy existent partition table before restore */ for (i = 0; i < nargs; i++) { s = gctl_get_ascii(req, "arg%d", i); gp = find_geom(classp, s); if (gp != NULL) { r = gctl_get_handle(); gctl_ro_param(r, "class", -1, classp->lg_name); gctl_ro_param(r, "verb", -1, "destroy"); gctl_ro_param(r, "flags", -1, "restore"); gctl_ro_param(r, "force", sizeof(forced), &forced); gctl_ro_param(r, "arg0", -1, s); errstr = gctl_issue(r); if (errstr != NULL && errstr[0] != '\0') { gpart_print_error(errstr); gctl_free(r); goto backout; } gctl_free(r); } } } created = 0; while (undo_restore == 0 && fgets(line, sizeof(line) - 1, stdin) != NULL) { /* Format of backup entries: * * [label] ['['attrib[,attrib]']'] */ pline = (char *)line; pline[strlen(line) - 1] = 0; if (skip_line(pline)) continue; for (ap = argv; (*ap = strsep(&pline, " \t")) != NULL;) if (**ap != '\0' && ++ap >= &argv[6]) break; l = ap - &argv[0]; label = pline = NULL; if (l == 1 || l == 2) { /* create table */ if (created) errx(EXIT_FAILURE, "Incorrect backup format."); if (l == 2) n = strtoimax(argv[1], NULL, 0); for (i = 0; i < nargs; i++) { s = gctl_get_ascii(req, "arg%d", i); r = gctl_get_handle(); gctl_ro_param(r, "class", -1, classp->lg_name); gctl_ro_param(r, "verb", -1, "create"); gctl_ro_param(r, "scheme", -1, argv[0]); if (l == 2) gctl_ro_param(r, "entries", sizeof(n), &n); gctl_ro_param(r, "flags", -1, "restore"); gctl_ro_param(r, "arg0", -1, s); errstr = gctl_issue(r); if (errstr != NULL && errstr[0] != '\0') { gpart_print_error(errstr); gctl_free(r); goto backout; } gctl_free(r); } created = 1; continue; } else if (l < 4 || created == 0) errx(EXIT_FAILURE, "Incorrect backup format."); else if (l == 5) { if (strchr(argv[4], '[') == NULL) label = argv[4]; else pline = argv[4]; } else if (l == 6) { label = argv[4]; pline = argv[5]; } /* Add partitions to each table */ for (i = 0; i < nargs; i++) { s = gctl_get_ascii(req, "arg%d", i); r = gctl_get_handle(); n = strtoimax(argv[0], NULL, 0); gctl_ro_param(r, "class", -1, classp->lg_name); gctl_ro_param(r, "verb", -1, "add"); gctl_ro_param(r, "flags", -1, "restore"); gctl_ro_param(r, GPART_PARAM_INDEX, sizeof(n), &n); gctl_ro_param(r, "type", -1, argv[1]); gctl_ro_param(r, "start", -1, argv[2]); gctl_ro_param(r, "size", -1, argv[3]); if (rl != 0 && label != NULL) gctl_ro_param(r, "label", -1, argv[4]); gctl_ro_param(r, "alignment", -1, GPART_AUTOFILL); gctl_ro_param(r, "arg0", -1, s); error = gpart_autofill(r); if (error != 0) errc(EXIT_FAILURE, error, "autofill"); errstr = gctl_issue(r); if (errstr != NULL && errstr[0] != '\0') { gpart_print_error(errstr); gctl_free(r); goto backout; } gctl_free(r); } if (pline == NULL || *pline != '[') continue; /* set attributes */ pline++; for (ap = argv; (*ap = strsep(&pline, ",]")) != NULL;) if (**ap != '\0' && ++ap >= &argv[6]) break; for (i = 0; i < nargs; i++) { l = ap - &argv[0]; s = gctl_get_ascii(req, "arg%d", i); while (l > 0) { r = gctl_get_handle(); gctl_ro_param(r, "class", -1, classp->lg_name); gctl_ro_param(r, "verb", -1, "set"); gctl_ro_param(r, "flags", -1, "restore"); gctl_ro_param(r, GPART_PARAM_INDEX, sizeof(n), &n); gctl_ro_param(r, "attrib", -1, argv[--l]); gctl_ro_param(r, "arg0", -1, s); errstr = gctl_issue(r); if (errstr != NULL && errstr[0] != '\0') { gpart_print_error(errstr); gctl_free(r); goto backout; } gctl_free(r); } } } if (undo_restore) goto backout; /* commit changes if needed */ if (strchr(flags, 'C') != NULL) { for (i = 0; i < nargs; i++) { s = gctl_get_ascii(req, "arg%d", i); r = gctl_get_handle(); gctl_ro_param(r, "class", -1, classp->lg_name); gctl_ro_param(r, "verb", -1, "commit"); gctl_ro_param(r, "arg0", -1, s); errstr = gctl_issue(r); if (errstr != NULL && errstr[0] != '\0') { gpart_print_error(errstr); gctl_free(r); goto backout; } gctl_free(r); } } gctl_free(req); geom_deletetree(&mesh); exit(EXIT_SUCCESS); backout: for (i = 0; i < nargs; i++) { s = gctl_get_ascii(req, "arg%d", i); r = gctl_get_handle(); gctl_ro_param(r, "class", -1, classp->lg_name); gctl_ro_param(r, "verb", -1, "undo"); gctl_ro_param(r, "arg0", -1, s); gctl_issue(r); gctl_free(r); } gctl_free(req); geom_deletetree(&mesh); exit(EXIT_FAILURE); } static void * gpart_bootfile_read(const char *bootfile, ssize_t *size) { struct stat sb; void *code; int fd; if (stat(bootfile, &sb) == -1) err(EXIT_FAILURE, "%s", bootfile); if (!S_ISREG(sb.st_mode)) errx(EXIT_FAILURE, "%s: not a regular file", bootfile); if (sb.st_size == 0) errx(EXIT_FAILURE, "%s: empty file", bootfile); if (*size > 0 && sb.st_size > *size) errx(EXIT_FAILURE, "%s: file too big (%zu limit)", bootfile, *size); *size = sb.st_size; fd = open(bootfile, O_RDONLY); if (fd == -1) err(EXIT_FAILURE, "%s", bootfile); code = malloc(*size); if (code == NULL) err(EXIT_FAILURE, NULL); if (read(fd, code, *size) != *size) err(EXIT_FAILURE, "%s", bootfile); close(fd); return (code); } static void gpart_write_partcode(struct ggeom *gp, int idx, void *code, ssize_t size) { char dsf[128]; struct gprovider *pp; const char *s; char *buf; off_t bsize; int fd; LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { s = find_provcfg(pp, "index"); if (s == NULL) continue; if (atoi(s) == idx) break; } if (pp != NULL) { snprintf(dsf, sizeof(dsf), "/dev/%s", pp->lg_name); fd = open(dsf, O_WRONLY); if (fd == -1) err(EXIT_FAILURE, "%s", dsf); if (lseek(fd, size, SEEK_SET) != size) errx(EXIT_FAILURE, "%s: not enough space", dsf); if (lseek(fd, 0, SEEK_SET) != 0) err(EXIT_FAILURE, "%s", dsf); /* * When writing to a disk device, the write must be * sector aligned and not write to any partial sectors, * so round up the buffer size to the next sector and zero it. */ bsize = (size + pp->lg_sectorsize - 1) / pp->lg_sectorsize * pp->lg_sectorsize; buf = calloc(1, bsize); if (buf == NULL) err(EXIT_FAILURE, "%s", dsf); bcopy(code, buf, size); if (write(fd, buf, bsize) != bsize) err(EXIT_FAILURE, "%s", dsf); free(buf); close(fd); } else errx(EXIT_FAILURE, "invalid partition index"); } static void gpart_write_partcode_vtoc8(struct ggeom *gp, int idx, void *code) { char dsf[128]; struct gprovider *pp; const char *s; int installed, fd; installed = 0; LIST_FOREACH(pp, &gp->lg_provider, lg_provider) { s = find_provcfg(pp, "index"); if (s == NULL) continue; if (idx != 0 && atoi(s) != idx) continue; snprintf(dsf, sizeof(dsf), "/dev/%s", pp->lg_name); if (pp->lg_sectorsize != sizeof(struct vtoc8)) errx(EXIT_FAILURE, "%s: unexpected sector " "size (%d)\n", dsf, pp->lg_sectorsize); fd = open(dsf, O_WRONLY); if (fd == -1) err(EXIT_FAILURE, "%s", dsf); if (lseek(fd, VTOC_BOOTSIZE, SEEK_SET) != VTOC_BOOTSIZE) continue; /* * We ignore the first VTOC_BOOTSIZE bytes of boot code in * order to avoid overwriting the label. */ if (lseek(fd, sizeof(struct vtoc8), SEEK_SET) != sizeof(struct vtoc8)) err(EXIT_FAILURE, "%s", dsf); if (write(fd, (caddr_t)code + sizeof(struct vtoc8), VTOC_BOOTSIZE - sizeof(struct vtoc8)) != VTOC_BOOTSIZE - sizeof(struct vtoc8)) err(EXIT_FAILURE, "%s", dsf); installed++; close(fd); if (idx != 0 && atoi(s) == idx) break; } if (installed == 0) errx(EXIT_FAILURE, "%s: no partitions", gp->lg_name); } static void gpart_bootcode(struct gctl_req *req, unsigned int fl) { struct gmesh mesh; struct gclass *classp; struct ggeom *gp; const char *s; void *bootcode, *partcode; size_t bootsize, partsize; int error, idx, vtoc8; if (gctl_has_param(req, GPART_PARAM_BOOTCODE)) { s = gctl_get_ascii(req, GPART_PARAM_BOOTCODE); bootsize = 800 * 1024; /* Arbitrary limit. */ bootcode = gpart_bootfile_read(s, &bootsize); error = gctl_change_param(req, GPART_PARAM_BOOTCODE, bootsize, bootcode); if (error) errc(EXIT_FAILURE, error, "internal error"); } else { bootcode = NULL; bootsize = 0; } s = gctl_get_ascii(req, "class"); if (s == NULL) abort(); error = geom_gettree(&mesh); if (error != 0) errc(EXIT_FAILURE, error, "Cannot get GEOM tree"); classp = find_class(&mesh, s); if (classp == NULL) { geom_deletetree(&mesh); errx(EXIT_FAILURE, "Class %s not found.", s); } if (gctl_get_int(req, "nargs") != 1) errx(EXIT_FAILURE, "Invalid number of arguments."); s = gctl_get_ascii(req, "arg0"); if (s == NULL) abort(); gp = find_geom(classp, s); if (gp == NULL) errx(EXIT_FAILURE, "No such geom: %s.", s); s = find_geomcfg(gp, "scheme"); if (s == NULL) errx(EXIT_FAILURE, "Scheme not found for geom %s", gp->lg_name); vtoc8 = 0; if (strcmp(s, "VTOC8") == 0) vtoc8 = 1; if (gctl_has_param(req, GPART_PARAM_PARTCODE)) { s = gctl_get_ascii(req, GPART_PARAM_PARTCODE); partsize = vtoc8 != 0 ? VTOC_BOOTSIZE : bootsize * 1024; partcode = gpart_bootfile_read(s, &partsize); error = gctl_delete_param(req, GPART_PARAM_PARTCODE); if (error) errc(EXIT_FAILURE, error, "internal error"); } else { partcode = NULL; partsize = 0; } if (gctl_has_param(req, GPART_PARAM_INDEX)) { if (partcode == NULL) errx(EXIT_FAILURE, "-i is only valid with -p"); idx = (int)gctl_get_intmax(req, GPART_PARAM_INDEX); if (idx < 1) errx(EXIT_FAILURE, "invalid partition index"); error = gctl_delete_param(req, GPART_PARAM_INDEX); if (error) errc(EXIT_FAILURE, error, "internal error"); } else idx = 0; if (partcode != NULL) { if (vtoc8 == 0) { if (idx == 0) errx(EXIT_FAILURE, "missing -i option"); gpart_write_partcode(gp, idx, partcode, partsize); } else { if (partsize != VTOC_BOOTSIZE) errx(EXIT_FAILURE, "invalid bootcode"); gpart_write_partcode_vtoc8(gp, idx, partcode); } } else if (bootcode == NULL) errx(EXIT_FAILURE, "no -b nor -p"); if (bootcode != NULL) gpart_issue(req, fl); geom_deletetree(&mesh); } static void gpart_print_error(const char *errstr) { char *errmsg; int error; error = strtol(errstr, &errmsg, 0); if (errmsg != errstr) { while (errmsg[0] == ' ') errmsg++; if (errmsg[0] != '\0') warnc(error, "%s", errmsg); else warnc(error, NULL); } else warnx("%s", errmsg); } static void gpart_issue(struct gctl_req *req, unsigned int fl __unused) { char buf[4096]; const char *errstr; int error, status; if (gctl_get_int(req, "nargs") != 1) errx(EXIT_FAILURE, "Invalid number of arguments."); (void)gctl_delete_param(req, "nargs"); /* autofill parameters (if applicable). */ error = gpart_autofill(req); if (error) { warnc(error, "autofill"); status = EXIT_FAILURE; goto done; } bzero(buf, sizeof(buf)); gctl_rw_param(req, "output", sizeof(buf), buf); errstr = gctl_issue(req); if (errstr == NULL || errstr[0] == '\0') { if (buf[0] != '\0') printf("%s", buf); status = EXIT_SUCCESS; goto done; } gpart_print_error(errstr); status = EXIT_FAILURE; done: gctl_free(req); exit(status); } diff --git a/sys/arm/arm/devmap.c b/sys/arm/arm/devmap.c index d9bdd9cac200..1c60a15f6e27 100644 --- a/sys/arm/arm/devmap.c +++ b/sys/arm/arm/devmap.c @@ -1,268 +1,310 @@ /*- * Copyright (c) 2013 Ian Lepore * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Routines for mapping device memory. */ +#include "opt_ddb.h" + #include #include #include #include #include #include #include static const struct arm_devmap_entry *devmap_table; static boolean_t devmap_bootstrap_done = false; /* * The allocated-kva (akva) devmap table and metadata. Platforms can call * arm_devmap_add_entry() to add static device mappings to this table using * automatically allocated virtual addresses carved out of the top of kva space. * Allocation begins immediately below the ARM_VECTORS_HIGH address. */ #define AKVA_DEVMAP_MAX_ENTRIES 32 static struct arm_devmap_entry akva_devmap_entries[AKVA_DEVMAP_MAX_ENTRIES]; static u_int akva_devmap_idx; static vm_offset_t akva_devmap_vaddr = ARM_VECTORS_HIGH; +/* + * Print the contents of the static mapping table using the provided printf-like + * output function (which will be either printf or db_printf). + */ +static void +devmap_dump_table(int (*prfunc)(const char *, ...)) +{ + const struct arm_devmap_entry *pd; + + if (devmap_table == NULL || devmap_table[0].pd_size == 0) { + prfunc("No static device mappings.\n"); + return; + } + + prfunc("Static device mappings:\n"); + for (pd = devmap_table; pd->pd_size != 0; ++pd) { + prfunc(" 0x%08x - 0x%08x mapped at VA 0x%08x\n", + pd->pd_pa, pd->pd_pa + pd->pd_size - 1, pd->pd_va); + } +} + +/* + * Print the contents of the static mapping table. Used for bootverbose. + */ +void +arm_devmap_print_table() +{ + devmap_dump_table(printf); +} + /* * Return the "last" kva address used by the registered devmap table. It's * actually the lowest address used by the static mappings, i.e., the address of * the first unusable byte of KVA. */ vm_offset_t arm_devmap_lastaddr() { const struct arm_devmap_entry *pd; vm_offset_t lowaddr; if (akva_devmap_idx > 0) return (akva_devmap_vaddr); lowaddr = ARM_VECTORS_HIGH; for (pd = devmap_table; pd != NULL && pd->pd_size != 0; ++pd) { if (lowaddr > pd->pd_va) lowaddr = pd->pd_va; } return (lowaddr); } /* * Add an entry to the internal "akva" static devmap table using the given * physical address and size and a virtual address allocated from the top of * kva. This automatically registers the akva table on the first call, so all a * platform has to do is call this routine to install as many mappings as it * needs and when initarm() calls arm_devmap_bootstrap() it will pick up all the * entries in the akva table automatically. */ void arm_devmap_add_entry(vm_paddr_t pa, vm_size_t sz) { struct arm_devmap_entry *m; if (devmap_bootstrap_done) panic("arm_devmap_add_entry() after arm_devmap_bootstrap()"); if (akva_devmap_idx == (AKVA_DEVMAP_MAX_ENTRIES - 1)) panic("AKVA_DEVMAP_MAX_ENTRIES is too small"); if (akva_devmap_idx == 0) arm_devmap_register_table(akva_devmap_entries); /* * Allocate virtual address space from the top of kva downwards. If the * range being mapped is aligned and sized to 1MB boundaries then also * align the virtual address to the next-lower 1MB boundary so that we * end up with a nice efficient section mapping. */ if ((pa & 0x000fffff) == 0 && (sz & 0x000fffff) == 0) { akva_devmap_vaddr = trunc_1mpage(akva_devmap_vaddr - sz); } else { akva_devmap_vaddr = trunc_page(akva_devmap_vaddr - sz); } m = &akva_devmap_entries[akva_devmap_idx++]; m->pd_va = akva_devmap_vaddr; m->pd_pa = pa; m->pd_size = sz; m->pd_prot = VM_PROT_READ | VM_PROT_WRITE; m->pd_cache = PTE_DEVICE; } /* * Register the given table as the one to use in arm_devmap_bootstrap(). */ void arm_devmap_register_table(const struct arm_devmap_entry *table) { devmap_table = table; } /* * Map all of the static regions in the devmap table, and remember the devmap * table so the mapdev, ptov, and vtop functions can do lookups later. * * If a non-NULL table pointer is given it is used unconditionally, otherwise * the previously-registered table is used. This smooths transition from legacy * code that fills in a local table then calls this function passing that table, * and newer code that uses arm_devmap_register_table() in platform-specific * code, then lets the common initarm() call this function with a NULL pointer. */ void arm_devmap_bootstrap(vm_offset_t l1pt, const struct arm_devmap_entry *table) { const struct arm_devmap_entry *pd; devmap_bootstrap_done = true; /* * If given a table pointer, use it. Otherwise, if a table was * previously registered, use it. Otherwise, no work to do. */ if (table != NULL) devmap_table = table; else if (devmap_table == NULL) return; for (pd = devmap_table; pd->pd_size != 0; ++pd) { pmap_map_chunk(l1pt, pd->pd_va, pd->pd_pa, pd->pd_size, pd->pd_prot,pd->pd_cache); } } /* * Look up the given physical address in the static mapping data and return the * corresponding virtual address, or NULL if not found. */ void * arm_devmap_ptov(vm_paddr_t pa, vm_size_t size) { const struct arm_devmap_entry *pd; if (devmap_table == NULL) return (NULL); for (pd = devmap_table; pd->pd_size != 0; ++pd) { if (pa >= pd->pd_pa && pa + size <= pd->pd_pa + pd->pd_size) return ((void *)(pd->pd_va + (pa - pd->pd_pa))); } return (NULL); } /* * Look up the given virtual address in the static mapping data and return the * corresponding physical address, or DEVMAP_PADDR_NOTFOUND if not found. */ vm_paddr_t arm_devmap_vtop(void * vpva, vm_size_t size) { const struct arm_devmap_entry *pd; vm_offset_t va; if (devmap_table == NULL) return (DEVMAP_PADDR_NOTFOUND); va = (vm_offset_t)vpva; for (pd = devmap_table; pd->pd_size != 0; ++pd) { if (va >= pd->pd_va && va + size <= pd->pd_va + pd->pd_size) return ((vm_paddr_t)(pd->pd_pa + (va - pd->pd_va))); } return (DEVMAP_PADDR_NOTFOUND); } /* * Map a set of physical memory pages into the kernel virtual address space. * Return a pointer to where it is mapped. * * This uses a pre-established static mapping if one exists for the requested * range, otherwise it allocates kva space and maps the physical pages into it. * * This routine is intended to be used for mapping device memory, NOT real * memory; the mapping type is inherently PTE_DEVICE in pmap_kenter_device(). */ void * pmap_mapdev(vm_offset_t pa, vm_size_t size) { vm_offset_t va, tmpva, offset; void * rva; /* First look in the static mapping table. */ if ((rva = arm_devmap_ptov(pa, size)) != NULL) return (rva); offset = pa & PAGE_MASK; pa = trunc_page(pa); size = round_page(size + offset); va = kva_alloc(size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { pmap_kenter_device(tmpva, pa); size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; } return ((void *)(va + offset)); } /* * Unmap device memory and free the kva space. */ void pmap_unmapdev(vm_offset_t va, vm_size_t size) { vm_offset_t tmpva, offset; vm_size_t origsize; /* Nothing to do if we find the mapping in the static table. */ if (arm_devmap_vtop((void*)va, size) != DEVMAP_PADDR_NOTFOUND) return; origsize = size; offset = va & PAGE_MASK; va = trunc_page(va); size = round_page(size + offset); for (tmpva = va; size > 0;) { pmap_kremove(tmpva); size -= PAGE_SIZE; tmpva += PAGE_SIZE; } kva_free(va, origsize); } +#ifdef DDB +#include + +DB_SHOW_COMMAND(devmap, db_show_devmap) +{ + devmap_dump_table(db_printf); +} + +#endif /* DDB */ + diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index c48933d2c68c..1a85d60d1378 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -1,1518 +1,1521 @@ /* $NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $ */ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * for the NetBSD Project. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Machine dependant functions for kernel setup * * Created : 17/09/94 * Updated : 18/04/01 updated for new wscons */ #include "opt_compat.h" #include "opt_ddb.h" #include "opt_platform.h" #include "opt_sched.h" #include "opt_timer.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif struct pcpu __pcpu[MAXCPU]; struct pcpu *pcpup = &__pcpu[0]; static struct trapframe proc0_tf; uint32_t cpu_reset_address = 0; int cold = 1; vm_offset_t vector_page; long realmem = 0; int (*_arm_memcpy)(void *, void *, int, int) = NULL; int (*_arm_bzero)(void *, int, int) = NULL; int _min_memcpy_size = 0; int _min_bzero_size = 0; extern int *end; #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif #ifdef FDT /* * This is the number of L2 page tables required for covering max * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, * stacks etc.), uprounded to be divisible by 4. */ #define KERNEL_PT_MAX 78 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; vm_paddr_t phys_avail[10]; vm_paddr_t dump_avail[4]; extern u_int data_abort_handler_address; extern u_int prefetch_abort_handler_address; extern u_int undefined_handler_address; vm_paddr_t pmap_pa; struct pv_addr systempage; static struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; static struct pv_addr kernelstack; #endif #if defined(LINUX_BOOT_ABI) #define LBABI_MAX_BANKS 10 uint32_t board_id; struct arm_lbabi_tag *atag_list; char linux_command_line[LBABI_MAX_COMMAND_LINE + 1]; char atags[LBABI_MAX_COMMAND_LINE * 2]; uint32_t memstart[LBABI_MAX_BANKS]; uint32_t memsize[LBABI_MAX_BANKS]; uint32_t membanks; #endif static uint32_t board_revision; /* hex representation of uint64_t */ static char board_serial[32]; SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD, 0, "Board attributes"); SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD, &board_revision, 0, "Board revision"); SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD, board_serial, 0, "Board serial"); int vfp_exists; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &vfp_exists, 0, "Floating point support enabled"); void board_set_serial(uint64_t serial) { snprintf(board_serial, sizeof(board_serial)-1, "%016jx", serial); } void board_set_revision(uint32_t revision) { board_revision = revision; } void sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; int onstack; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_usr_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe *)td->td_frame->tf_usr_sp; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); /* Populate the siginfo frame. */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_r0 = sig; tf->tf_r1 = (register_t)&fp->sf_si; tf->tf_r2 = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_r5 = (register_t)&fp->sf_uc; tf->tf_pc = (register_t)catcher; tf->tf_usr_sp = (register_t)fp; tf->tf_usr_lr = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr, tf->tf_usr_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } struct kva_md_info kmi; /* * arm32_vector_init: * * Initialize the vector page, and select whether or not to * relocate the vectors. * * NOTE: We expect the vector page to be mapped at its expected * destination. */ extern unsigned int page0[], page0_data[]; void arm_vector_init(vm_offset_t va, int which) { unsigned int *vectors = (int *) va; unsigned int *vectors_data = vectors + (page0_data - page0); int vec; /* * Loop through the vectors we're taking over, and copy the * vector's insn and data word. */ for (vec = 0; vec < ARM_NVEC; vec++) { if ((which & (1 << vec)) == 0) { /* Don't want to take over this vector. */ continue; } vectors[vec] = page0[vec]; vectors_data[vec] = page0_data[vec]; } /* Now sync the vectors. */ cpu_icache_sync_range(va, (ARM_NVEC * 2) * sizeof(u_int)); vector_page = va; if (va == ARM_VECTORS_HIGH) { /* * Assume the MD caller knows what it's doing here, and * really does want the vector page relocated. * * Note: This has to be done here (and not just in * cpu_setup()) because the vector page needs to be * accessible *before* cpu_startup() is called. * Think ddb(9) ... * * NOTE: If the CPU control register is not readable, * this will totally fail! We'll just assume that * any system that has high vector support has a * readable CPU control register, for now. If we * ever encounter one that does not, we'll have to * rethink this. */ cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC); } } static void cpu_startup(void *dummy) { struct pcb *pcb = thread0.td_pcb; #ifdef ARM_TP_ADDRESS #ifndef ARM_CACHE_LOCK_ENABLE vm_page_t m; #endif #endif identify_arm_cpu(); printf("real memory = %ju (%ju MB)\n", (uintmax_t)ptoa(physmem), (uintmax_t)ptoa(physmem) / 1048576); realmem = physmem; /* * Display the RAM layout. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; - printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", + printf(" 0x%08jx - 0x%08jx, %ju KBytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, - (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); + (uintmax_t)size / 1024, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", (uintmax_t)ptoa(cnt.v_free_count), (uintmax_t)ptoa(cnt.v_free_count) / 1048576); + if (bootverbose) + arm_devmap_print_table(); + bufinit(); vm_pager_bufferinit(); pcb->un_32.pcb32_und_sp = (u_int)thread0.td_kstack + USPACE_UNDEF_STACK_TOP; pcb->un_32.pcb32_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; vector_page_setprot(VM_PROT_READ); pmap_set_pcb_pagedir(pmap_kernel(), pcb); pmap_postinit(); #ifdef ARM_TP_ADDRESS #ifdef ARM_CACHE_LOCK_ENABLE pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS); arm_lock_cache_line(ARM_TP_ADDRESS); #else m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO); pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m)); #endif *(uint32_t *)ARM_RAS_START = 0; *(uint32_t *)ARM_RAS_END = 0xffffffff; #endif } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { cpu_dcache_wb_range((uintptr_t)ptr, len); cpu_l2cache_wb_range((uintptr_t)ptr, len); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } void cpu_idle(int busy) { CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); #ifndef NO_EVENTTIMERS if (!busy) { critical_enter(); cpu_idleclock(); } #endif if (!sched_runnable()) cpu_sleep(0); #ifndef NO_EVENTTIMERS if (!busy) { cpu_activeclock(); critical_exit(); } #endif CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(&tf->tf_r0, regs->r, sizeof(regs->r)); regs->r_sp = tf->tf_usr_sp; regs->r_lr = tf->tf_usr_lr; regs->r_pc = tf->tf_pc; regs->r_cpsr = tf->tf_spsr; return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { bzero(regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(regs->r, &tf->tf_r0, sizeof(regs->r)); tf->tf_usr_sp = regs->r_sp; tf->tf_usr_lr = regs->r_lr; tf->tf_pc = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { return (0); } static int ptrace_read_int(struct thread *td, vm_offset_t addr, u_int32_t *v) { struct iovec iov; struct uio uio; PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED); iov.iov_base = (caddr_t) v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; return proc_rwmem(td->td_proc, &uio); } static int ptrace_write_int(struct thread *td, vm_offset_t addr, u_int32_t v) { struct iovec iov; struct uio uio; PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED); iov.iov_base = (caddr_t) &v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; return proc_rwmem(td->td_proc, &uio); } int ptrace_single_step(struct thread *td) { struct proc *p; int error; KASSERT(td->td_md.md_ptrace_instr == 0, ("Didn't clear single step")); p = td->td_proc; PROC_UNLOCK(p); error = ptrace_read_int(td, td->td_frame->tf_pc + 4, &td->td_md.md_ptrace_instr); if (error) goto out; error = ptrace_write_int(td, td->td_frame->tf_pc + 4, PTRACE_BREAKPOINT); if (error) td->td_md.md_ptrace_instr = 0; td->td_md.md_ptrace_addr = td->td_frame->tf_pc + 4; out: PROC_LOCK(p); return (error); } int ptrace_clear_single_step(struct thread *td) { struct proc *p; if (td->td_md.md_ptrace_instr) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr, td->td_md.md_ptrace_instr); PROC_LOCK(p); td->td_md.md_ptrace_instr = 0; } return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_pc = addr; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; register_t cspr; td = curthread; if (td->td_md.md_spinlock_count == 0) { cspr = disable_interrupts(I32_bit | F32_bit); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = cspr; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t cspr; td = curthread; critical_exit(); cspr = td->td_md.md_saved_cspr; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restore_interrupts(cspr); } /* * Clear registers on exec */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(*tf)); tf->tf_usr_sp = stack; tf->tf_usr_lr = imgp->entry_addr; tf->tf_svc_lr = 0x77777777; tf->tf_pc = imgp->entry_addr; tf->tf_spsr = PSR_USR32_MODE; } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; if (clear_ret & GET_MC_CLEAR_RET) gr[_REG_R0] = 0; else gr[_REG_R0] = tf->tf_r0; gr[_REG_R1] = tf->tf_r1; gr[_REG_R2] = tf->tf_r2; gr[_REG_R3] = tf->tf_r3; gr[_REG_R4] = tf->tf_r4; gr[_REG_R5] = tf->tf_r5; gr[_REG_R6] = tf->tf_r6; gr[_REG_R7] = tf->tf_r7; gr[_REG_R8] = tf->tf_r8; gr[_REG_R9] = tf->tf_r9; gr[_REG_R10] = tf->tf_r10; gr[_REG_R11] = tf->tf_r11; gr[_REG_R12] = tf->tf_r12; gr[_REG_SP] = tf->tf_usr_sp; gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; gr[_REG_CPSR] = tf->tf_spsr; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tf = td->td_frame; const __greg_t *gr = mcp->__gregs; tf->tf_r0 = gr[_REG_R0]; tf->tf_r1 = gr[_REG_R1]; tf->tf_r2 = gr[_REG_R2]; tf->tf_r3 = gr[_REG_R3]; tf->tf_r4 = gr[_REG_R4]; tf->tf_r5 = gr[_REG_R5]; tf->tf_r6 = gr[_REG_R6]; tf->tf_r7 = gr[_REG_R7]; tf->tf_r8 = gr[_REG_R8]; tf->tf_r9 = gr[_REG_R9]; tf->tf_r10 = gr[_REG_R10]; tf->tf_r11 = gr[_REG_R11]; tf->tf_r12 = gr[_REG_R12]; tf->tf_usr_sp = gr[_REG_SP]; tf->tf_usr_lr = gr[_REG_LR]; tf->tf_pc = gr[_REG_PC]; tf->tf_spsr = gr[_REG_CPSR]; return (0); } /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { struct sigframe sf; struct trapframe *tf; int spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &sf, sizeof(sf))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ spsr = sf.sf_uc.uc_mcontext.__gregs[_REG_CPSR]; if ((spsr & PSR_MODE) != PSR_USR32_MODE || (spsr & (I32_bit | F32_bit)) != 0) return (EINVAL); /* Restore register context. */ tf = td->td_frame; set_mcontext(td, &sf.sf_uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &sf.sf_uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->un_32.pcb32_r8 = tf->tf_r8; pcb->un_32.pcb32_r9 = tf->tf_r9; pcb->un_32.pcb32_r10 = tf->tf_r10; pcb->un_32.pcb32_r11 = tf->tf_r11; pcb->un_32.pcb32_r12 = tf->tf_r12; pcb->un_32.pcb32_pc = tf->tf_pc; pcb->un_32.pcb32_lr = tf->tf_usr_lr; pcb->un_32.pcb32_sp = tf->tf_usr_sp; } /* * Make a standard dump_avail array. Can't make the phys_avail * since we need to do that after we call pmap_bootstrap, but this * is needed before pmap_boostrap. * * ARM_USE_SMALL_ALLOC uses dump_avail, so it must be filled before * calling pmap_bootstrap. */ void arm_dump_avail_init(vm_offset_t ramsize, size_t max) { #ifdef LINUX_BOOT_ABI /* * Linux boot loader passes us the actual banks of memory, so use them * to construct the dump_avail array. */ if (membanks > 0) { int i, j; if (max < (membanks + 1) * 2) panic("dump_avail[%d] too small for %d banks\n", max, membanks); for (j = 0, i = 0; i < membanks; i++) { dump_avail[j++] = round_page(memstart[i]); dump_avail[j++] = trunc_page(memstart[i] + memsize[i]); } dump_avail[j++] = 0; dump_avail[j++] = 0; return; } #endif if (max < 4) panic("dump_avail too small\n"); dump_avail[0] = round_page(PHYSADDR); dump_avail[1] = trunc_page(PHYSADDR + ramsize); dump_avail[2] = 0; dump_avail[3] = 0; } /* * Fake up a boot descriptor table */ vm_offset_t fake_preload_metadata(struct arm_boot_params *abp __unused) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = KERNVIRTADDR; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR; #ifdef DDB if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); ksym_start = zstart; ksym_end = zend; } else #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; return (lastaddr); } void pcpu0_init(void) { #if ARM_ARCH_6 || ARM_ARCH_7A || defined(CPU_MV_PJ4B) set_pcpu(pcpup); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); #ifdef VFP PCPU_SET(cpu, 0); #endif } #if defined(LINUX_BOOT_ABI) vm_offset_t linux_parse_boot_param(struct arm_boot_params *abp) { struct arm_lbabi_tag *walker; uint32_t revision; uint64_t serial; /* * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2 * is atags or dtb pointer. If all of these aren't satisfied, * then punt. */ if (!(abp->abp_r0 == 0 && abp->abp_r1 != 0 && abp->abp_r2 != 0)) return 0; board_id = abp->abp_r1; walker = (struct arm_lbabi_tag *) (abp->abp_r2 + KERNVIRTADDR - KERNPHYSADDR); /* xxx - Need to also look for binary device tree */ if (ATAG_TAG(walker) != ATAG_CORE) return 0; atag_list = walker; while (ATAG_TAG(walker) != ATAG_NONE) { switch (ATAG_TAG(walker)) { case ATAG_CORE: break; case ATAG_MEM: if (membanks < LBABI_MAX_BANKS) { memstart[membanks] = walker->u.tag_mem.start; memsize[membanks] = walker->u.tag_mem.size; } membanks++; break; case ATAG_INITRD2: break; case ATAG_SERIAL: serial = walker->u.tag_sn.low | ((uint64_t)walker->u.tag_sn.high << 32); board_set_serial(serial); break; case ATAG_REVISION: revision = walker->u.tag_rev.rev; board_set_revision(revision); break; case ATAG_CMDLINE: /* XXX open question: Parse this for boothowto? */ bcopy(walker->u.tag_cmd.command, linux_command_line, ATAG_SIZE(walker)); break; default: break; } walker = ATAG_NEXT(walker); } /* Save a copy for later */ bcopy(atag_list, atags, (char *)walker - (char *)atag_list + ATAG_SIZE(walker)); return fake_preload_metadata(abp); } #endif #if defined(FREEBSD_BOOT_LOADER) vm_offset_t freebsd_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr = 0; void *mdp; void *kmdp; /* * Mask metadata pointer: it is supposed to be on page boundary. If * the first argument (mdp) doesn't point to a valid address the * bootloader must have passed us something else than the metadata * ptr, so we give up. Also give up if we cannot find metadta section * the loader creates that we get all this data out of. */ if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL) return 0; preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) return 0; boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif preload_addr_relocate = KERNVIRTADDR - KERNPHYSADDR; return lastaddr; } #endif vm_offset_t default_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr; #if defined(LINUX_BOOT_ABI) if ((lastaddr = linux_parse_boot_param(abp)) != 0) return lastaddr; #endif #if defined(FREEBSD_BOOT_LOADER) if ((lastaddr = freebsd_parse_boot_param(abp)) != 0) return lastaddr; #endif /* Fall back to hardcoded metadata. */ lastaddr = fake_preload_metadata(abp); return lastaddr; } /* * Stub version of the boot parameter parsing routine. We are * called early in initarm, before even VM has been initialized. * This routine needs to preserve any data that the boot loader * has passed in before the kernel starts to grow past the end * of the BSS, traditionally the place boot-loaders put this data. * * Since this is called so early, things that depend on the vm system * being setup (including access to some SoC's serial ports), about * all that can be done in this routine is to copy the arguments. * * This is the default boot parameter parsing routine. Individual * kernels/boards can override this weak function with one of their * own. We just fake metadata... */ __weak_reference(default_parse_boot_param, parse_boot_param); /* * Initialize proc0 */ void init_proc0(vm_offset_t kstack) { proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #ifdef FDT static char * kenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } static void print_kenv(void) { int len; char *cp; debugf("loader passed (static) kenv:\n"); if (kern_envp == NULL) { debugf(" no env, null ptr\n"); return; } debugf(" kern_envp = 0x%08x\n", (uint32_t)kern_envp); len = 0; for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) debugf(" %x %s\n", (uint32_t)cp, cp); } static void physmap_init(struct mem_region *availmem_regions, int availmem_regions_sz) { int i, j, cnt; vm_offset_t phys_kernelend, kernload; uint32_t s, e, sz; struct mem_region *mp, *mp1; phys_kernelend = KERNPHYSADDR + (virtual_avail - KERNVIRTADDR); kernload = KERNPHYSADDR; /* * Remove kernel physical address range from avail * regions list. Page align all regions. * Non-page aligned memory isn't very interesting to us. * Also, sort the entries for ascending addresses. */ sz = 0; cnt = availmem_regions_sz; debugf("processing avail regions:\n"); for (mp = availmem_regions; mp->mr_size; mp++) { s = mp->mr_start; e = mp->mr_start + mp->mr_size; debugf(" %08x-%08x -> ", s, e); /* Check whether this region holds all of the kernel. */ if (s < kernload && e > phys_kernelend) { availmem_regions[cnt].mr_start = phys_kernelend; availmem_regions[cnt++].mr_size = e - phys_kernelend; e = kernload; } /* Look whether this regions starts within the kernel. */ if (s >= kernload && s < phys_kernelend) { if (e <= phys_kernelend) goto empty; s = phys_kernelend; } /* Now look whether this region ends within the kernel. */ if (e > kernload && e <= phys_kernelend) { if (s >= kernload) { goto empty; } e = kernload; } /* Now page align the start and size of the region. */ s = round_page(s); e = trunc_page(e); if (e < s) e = s; sz = e - s; debugf("%08x-%08x = %x\n", s, e, sz); /* Check whether some memory is left here. */ if (sz == 0) { empty: printf("skipping\n"); bcopy(mp + 1, mp, (cnt - (mp - availmem_regions)) * sizeof(*mp)); cnt--; mp--; continue; } /* Do an insertion sort. */ for (mp1 = availmem_regions; mp1 < mp; mp1++) if (s < mp1->mr_start) break; if (mp1 < mp) { bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1); mp1->mr_start = s; mp1->mr_size = sz; } else { mp->mr_start = s; mp->mr_size = sz; } } availmem_regions_sz = cnt; /* Fill in phys_avail table, based on availmem_regions */ debugf("fill in phys_avail:\n"); for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { debugf(" region: 0x%08x - 0x%08x (0x%08x)\n", availmem_regions[i].mr_start, availmem_regions[i].mr_start + availmem_regions[i].mr_size, availmem_regions[i].mr_size); /* * We should not map the page at PA 0x0000000, the VM can't * handle it, as pmap_extract() == 0 means failure. */ if (availmem_regions[i].mr_start > 0 || availmem_regions[i].mr_size > PAGE_SIZE) { vm_size_t size; phys_avail[j] = availmem_regions[i].mr_start; size = availmem_regions[i].mr_size; if (phys_avail[j] == 0) { phys_avail[j] += PAGE_SIZE; size -= PAGE_SIZE; } phys_avail[j + 1] = availmem_regions[i].mr_start + size; } else j -= 2; } phys_avail[j] = 0; phys_avail[j + 1] = 0; } void * initarm(struct arm_boot_params *abp) { struct mem_region memory_regions[FDT_MEM_REGIONS]; struct mem_region availmem_regions[FDT_MEM_REGIONS]; struct mem_region reserved_regions[FDT_MEM_REGIONS]; struct pv_addr kernel_l1pt; struct pv_addr dpcpu; vm_offset_t dtbp, freemempos, l2_start, lastaddr; uint32_t memsize, l2size; char *env; void *kmdp; u_int l1pagetable; int i = 0, j = 0, err_devmap = 0; int memory_regions_sz; int availmem_regions_sz; int reserved_regions_sz; vm_offset_t start, end; vm_offset_t rstart, rend; int curr; lastaddr = parse_boot_param(abp); memsize = 0; set_cpufuncs(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); else dtbp = (vm_offset_t)NULL; #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) while (1); if (OF_init((void *)dtbp) != 0) while (1); /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(memory_regions, &memory_regions_sz, &memsize) != 0) while(1); /* Grab physical memory regions information from device tree. */ if (fdt_get_reserved_regions(reserved_regions, &reserved_regions_sz) != 0) reserved_regions_sz = 0; /* * Now exclude all the reserved regions */ curr = 0; for (i = 0; i < memory_regions_sz; i++) { start = memory_regions[i].mr_start; end = start + memory_regions[i].mr_size; for (j = 0; j < reserved_regions_sz; j++) { rstart = reserved_regions[j].mr_start; rend = rstart + reserved_regions[j].mr_size; /* * Restricted region is before available * Skip restricted region */ if (rend <= start) continue; /* * Restricted region is behind available * No further processing required */ if (rstart >= end) break; /* * Restricted region includes memory region * skip available region */ if ((start >= rstart) && (rend >= end)) { start = rend; end = rend; break; } /* * Memory region includes restricted region */ if ((rstart > start) && (end > rend)) { availmem_regions[curr].mr_start = start; availmem_regions[curr++].mr_size = rstart - start; start = rend; break; } /* * Memory region partially overlaps with restricted */ if ((rstart >= start) && (rstart <= end)) { end = rstart; } else if ((rend >= start) && (rend <= end)) { start = rend; } } if (end > start) { availmem_regions[curr].mr_start = start; availmem_regions[curr++].mr_size = end - start; } } availmem_regions_sz = curr; /* Platform-specific initialisation */ initarm_early_init(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* Calculate number of L2 tables needed for mapping vm_page_array */ l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); l2size = (l2size >> L1_S_SHIFT) + 1; /* * Add one table for end of kernel map, one for stacks, msgbuf and * L1 and L2 tables map and one for vectors map. */ l2size += 3; /* Make it divisible by 4 */ l2size = (l2size + 3) & ~3; freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_va, (np)); \ (var).pv_pa = (var).pv_va + (KERNPHYSADDR - KERNVIRTADDR); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); for (i = 0; i < l2size; ++i) { if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); j = i; } else { kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + L2_TABLE_SIZE_REAL * (i - j); kernel_pt_table[i].pv_pa = kernel_pt_table[i].pv_va - KERNVIRTADDR + KERNPHYSADDR; } } /* * Allocate a page for the system page mapped to 0x00000000 * or 0xffff0000. This page will just contain the system vectors * and can be shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* * Try to map as much as possible of kernel text and data using * 1MB section mapping and for the rest of initial kernel address * space use L2 coarse tables. * * Link L2 tables for mapping remainder of kernel (modulo 1MB) * and kernel structures */ l2_start = lastaddr & ~(L1_S_OFFSET); for (i = 0 ; i < l2size - 1; i++) pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE, &kernel_pt_table[i]); pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE; /* Map kernel code and data */ pmap_map_chunk(l1pagetable, KERNVIRTADDR, KERNPHYSADDR, (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map L1 directory and allocated L2 page tables */ pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va, kernel_pt_table[0].pv_pa, L2_TABLE_SIZE_REAL * l2size, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); /* Map allocated DPCPU, stacks and msgbuf */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, freemempos - dpcpu.pv_va, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Link and map the vector page */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, &kernel_pt_table[l2size - 1]); pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE); /* Establish static device mappings. */ err_devmap = initarm_devmap_init(); arm_devmap_bootstrap(l1pagetable, NULL); vm_max_kernel_address = initarm_lastaddr(); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); pmap_pa = kernel_l1pt.pv_pa; setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(""); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); initarm_gpio_init(); cninit(); physmem = memsize / PAGE_SIZE; debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); print_kenv(); env = getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); initarm_late_init(); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); /* Set stack for exception handlers */ data_abort_handler_address = (u_int)data_abort_handler; prefetch_abort_handler_address = (u_int)prefetch_abort_handler; undefined_handler_address = (u_int)undefinedinstruction_bounce; undefined_init(); init_proc0(kernelstack.pv_va); arm_intrnames_init(); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); arm_dump_avail_init(memsize, sizeof(dump_avail) / sizeof(dump_avail[0])); pmap_bootstrap(freemempos, &kernel_l1pt); msgbufp = (void *)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); /* * Prepare map of physical memory regions available to vm subsystem. */ physmap_init(availmem_regions, availmem_regions_sz); init_param2(physmem); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } #endif diff --git a/sys/arm/include/devmap.h b/sys/arm/include/devmap.h index 028f40d7af19..e205d9b85397 100644 --- a/sys/arm/include/devmap.h +++ b/sys/arm/include/devmap.h @@ -1,90 +1,93 @@ /*- * Copyright (c) 2013 Ian Lepore * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_DEVMAP_H_ #define _MACHINE_DEVMAP_H_ /* * This structure is used by MD code to describe static mappings of devices * which are established as part of bringing up the MMU early in the boot. */ struct arm_devmap_entry { vm_offset_t pd_va; /* virtual address */ vm_paddr_t pd_pa; /* physical address */ vm_size_t pd_size; /* size of region */ vm_prot_t pd_prot; /* protection code */ int pd_cache; /* cache attributes */ }; /* * Return the lowest KVA address used in any entry in the registered devmap * table. This works with whatever table is registered, including the internal * table used by arm_devmap_add_entry() if that routine was used. Platforms can * implement initarm_lastaddr() by calling this if static device mappings are * their only use of high KVA space. */ vm_offset_t arm_devmap_lastaddr(void); /* * Automatically allocate KVA (from the top of the address space downwards) and * make static device mapping entries in an internal table. The internal table * is automatically registered on the first call to this. */ void arm_devmap_add_entry(vm_paddr_t pa, vm_size_t sz); /* * Register a platform-local table to be bootstrapped by the generic * initarm() in arm/machdep.c. This is used by newer code that allocates and * fills in its own local table but does not have its own initarm() routine. */ void arm_devmap_register_table(const struct arm_devmap_entry * _table); /* * Establish mappings for all the entries in the table. This is called * automatically from the common initarm() in arm/machdep.c, and also from the * custom initarm() routines in older code. If the table pointer is NULL, this * will use the table installed previously by arm_devmap_register_table(). */ void arm_devmap_bootstrap(vm_offset_t _l1pt, const struct arm_devmap_entry *_table); /* * Translate between virtual and physical addresses within a region that is * static-mapped by the devmap code. If the given address range isn't * static-mapped, then ptov returns NULL and vtop returns DEVMAP_PADDR_NOTFOUND. * The latter implies that you can't vtop just the last byte of physical address * space. This is not as limiting as it might sound, because even if a device * occupies the end of the physical address space, you're only prevented from * doing vtop for that single byte. If you vtop a size bigger than 1 it works. */ #define DEVMAP_PADDR_NOTFOUND ((vm_paddr_t)(-1)) void * arm_devmap_ptov(vm_paddr_t _pa, vm_size_t _sz); vm_paddr_t arm_devmap_vtop(void * _va, vm_size_t _sz); +/* Print the static mapping table; used for bootverbose output. */ +void arm_devmap_print_table(void); + #endif diff --git a/sys/conf/Makefile.arm b/sys/conf/Makefile.arm index 034bc4d7dc38..eacba7bb45fa 100644 --- a/sys/conf/Makefile.arm +++ b/sys/conf/Makefile.arm @@ -1,147 +1,151 @@ # Makefile.arm -- with config changes. # Copyright 1990 W. Jolitz # from: @(#)Makefile.i386 7.1 5/10/91 # $FreeBSD$ # # Makefile for FreeBSD # # This makefile is constructed from a machine description: # config machineid # Most changes should be made in the machine description # /sys/arm/conf/``machineid'' # after which you should do # config machineid # Generic makefile changes should be made in # /sys/conf/Makefile.arm # after which config should be rerun for all machines. # # Which version of config(8) is required. %VERSREQ= 600004 STD8X16FONT?= iso .if !defined(S) .if exists(./@/.) S= ./@ .else S= ../../.. .endif .endif .include "$S/conf/kern.pre.mk" INCLUDES+= -I$S/contrib/libfdt SYSTEM_LD:= ${SYSTEM_LD:$S/conf/ldscript.$M=ldscript.$M} SYSTEM_DEP:= ${SYSTEM_DEP:$S/conf/ldscript.$M=ldscript.$M} .if !defined(DEBUG) && !defined(PROFLEVEL) STRIP_FLAGS = -S .endif -CFLAGS.gcc += -mno-thumb-interwork +.if ${COMPILER_TYPE} != "clang" +CFLAGS += -mno-thumb-interwork +.endif .if empty(DDB_ENABLED) -.if defined(WITHOUT_ARM_EABI) -CFLAGS.gcc += -mno-apcs-frame +.if defined(WITHOUT_ARM_EABI) && ${COMPILER_TYPE} != "clang" +CFLAGS += -mno-apcs-frame .endif .elif !defined(WITHOUT_ARM_EABI) CFLAGS += -funwind-tables +.if ${COMPILER_TYPE} == "clang" # clang requires us to tell it to emit assembly with unwind information -CFLAGS.clang += -mllvm -arm-enable-ehabi +CFLAGS += -mllvm -arm-enable-ehabi +.endif .endif SYSTEM_LD_ = ${LD} -Bdynamic -T ldscript.$M.noheader ${LDFLAGS} \ -warn-common -export-dynamic -dynamic-linker /red/herring -o \ ${FULLKERNEL}.noheader -X ${SYSTEM_OBJS} vers.o SYSTEM_LD_TAIL +=;sed s/" + SIZEOF_HEADERS"// ldscript.$M\ >ldscript.$M.noheader; \ ${SYSTEM_LD_}; \ ${OBJCOPY} -S -O binary ${FULLKERNEL}.noheader \ ${KERNEL_KO}.bin; \ rm ${FULLKERNEL}.noheader .if defined(MFS_IMAGE) SYSTEM_LD_TAIL += ;sh ${S}/tools/embed_mfs.sh ${KERNEL_KO}.bin ${MFS_IMAGE}; .endif FILES_CPU_FUNC = $S/$M/$M/cpufunc_asm_arm7tdmi.S \ $S/$M/$M/cpufunc_asm_arm8.S $S/$M/$M/cpufunc_asm_arm9.S \ $S/$M/$M/cpufunc_asm_sa1.S $S/$M/$M/cpufunc_asm_arm10.S \ $S/$M/$M/cpufunc_asm_xscale.S $S/$M/$M/cpufunc_asm.S \ $S/$M/$M/cpufunc_asm_xscale_c3.S $S/$M/$M/cpufunc_asm_armv5_ec.S \ $S/$M/$M/cpufunc_asm_fa526.S $S/$M/$M/cpufunc_asm_sheeva.S \ $S/$M/$M/cpufunc_asm_pj4b.S $S/$M/$M/cpufunc_asm_armv6.S \ $S/$M/$M/cpufunc_asm_armv7.S KERNEL_EXTRA=trampoline KERNEL_EXTRA_INSTALL=kernel.gz.tramp trampoline: ${KERNEL_KO}.tramp ${KERNEL_KO}.tramp: ${KERNEL_KO} $S/$M/$M/inckern.S $S/$M/$M/elf_trampoline.c echo "#define KERNNAME \"${KERNEL_KO}.tmp\"" >opt_kernname.h sed s/${KERNVIRTADDR}/${KERNPHYSADDR}/ ldscript.$M > ldscript.$M.tramp sed s/" + SIZEOF_HEADERS"// ldscript.$M.tramp > \ ldscript.$M.tramp.noheader echo "#include " >tmphack.S echo "ENTRY(_start)" >>tmphack.S echo "bl _startC" >>tmphack.S ${OBJCOPY} --strip-symbol '$$d' --strip-symbol '$$a' \ -g --strip-symbol '$$t' ${FULLKERNEL} ${KERNEL_KO}.tmp eval $$(stat -s ${KERNEL_KO}.tmp) && \ echo "#define KERNSIZE $$st_size" >>opt_kernname.h ${CC} -O -nostdlib -I. -I$S -Xlinker -T -Xlinker ldscript.$M.tramp \ tmphack.S $S/$M/$M/elf_trampoline.c $S/$M/$M/inckern.S \ ${FILES_CPU_FUNC} -o ${KERNEL_KO}.tramp ${CC} -O -nostdlib -I. -I$S -Xlinker -T -Xlinker \ ldscript.$M.tramp.noheader \ tmphack.S $S/$M/$M/elf_trampoline.c $S/$M/$M/inckern.S \ ${FILES_CPU_FUNC} -o ${KERNEL_KO}.tramp.noheader ${OBJCOPY} -S -O binary ${KERNEL_KO}.tramp.noheader \ ${KERNEL_KO}.tramp.bin ${OBJCOPY} ${STRIP_FLAGS} ${KERNEL_KO}.tmp echo "#define KERNNAME \"${KERNEL_KO}.tmp.gz\"" >opt_kernname.h eval $$(stat -s ${KERNEL_KO}.tmp) && \ echo "#define KERNSIZE $$st_size" >>opt_kernname.h gzip -f9 ${KERNEL_KO}.tmp eval $$(stat -s ${KERNEL_KO}.tmp.gz) && \ echo "#define KERNCOMPSIZE $$st_size" >>opt_kernname.h ${CC} -O2 -ffreestanding -DKZIP -I. -I$S -c $S/kern/inflate.c -o \ inflate-tramp.o ${CC} -O -nostdlib -I. -I$S -Xlinker -T -Xlinker ldscript.$M.tramp \ -DKZIP tmphack.S $S/$M/$M/elf_trampoline.c inflate-tramp.o \ $S/$M/$M/inckern.S ${FILES_CPU_FUNC} -o ${KERNEL_KO}.gz.tramp ${CC} -O -nostdlib -I. -I$S -Xlinker -T -Xlinker \ ldscript.$M.tramp.noheader \ -DKZIP tmphack.S $S/$M/$M/elf_trampoline.c inflate-tramp.o \ $S/$M/$M/inckern.S ${FILES_CPU_FUNC} -o ${KERNEL_KO}.tramp.noheader ${OBJCOPY} -S -O binary ${KERNEL_KO}.tramp.noheader \ ${KERNEL_KO}.gz.tramp.bin rm ${KERNEL_KO}.tmp.gz ${KERNEL_KO}.tramp.noheader opt_kernname.h \ inflate-tramp.o tmphack.S MKMODULESENV+= MACHINE=${MACHINE} %BEFORE_DEPEND %OBJS %FILES.c %FILES.s %FILES.m %CLEAN CLEAN+= ldscript.$M ${KERNEL_KO}.bin ldscript.$M.noheader CLEAN+= ${KERNEL_KO}.tramp ${KERNEL_KO}.tramp.bin ldscript.$M.tramp \ ldscript.$M.tramp.noheader ${KERNEL_KO}.gz.tramp \ ${KERNEL_KO}.gz.tramp.bin ldscript.$M: $S/conf/ldscript.$M cat $S/conf/ldscript.$M|sed s/KERNPHYSADDR/${KERNPHYSADDR}/g| \ sed s/KERNVIRTADDR/${KERNVIRTADDR}/g > ldscript.$M %RULES .include "$S/conf/kern.post.mk" diff --git a/sys/net/route.c b/sys/net/route.c index 0c54f366378e..6a8be53907c1 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -1,1733 +1,1839 @@ /*- * Copyright (c) 1980, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 * $FreeBSD$ */ /************************************************************************ * Note: In this file a 'fib' is a "forwarding information base" * * Which is the new name for an in kernel routing (next hop) table. * ***********************************************************************/ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" +#include "opt_sctp.h" #include "opt_mrouting.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #define RT_MAXFIBS UINT16_MAX /* Kernel config default option. */ #ifdef ROUTETABLES #if ROUTETABLES <= 0 #error "ROUTETABLES defined too low" #endif #if ROUTETABLES > RT_MAXFIBS #error "ROUTETABLES defined too big" #endif #define RT_NUMFIBS ROUTETABLES #endif /* ROUTETABLES */ /* Initialize to default if not otherwise set. */ #ifndef RT_NUMFIBS #define RT_NUMFIBS 1 #endif +#if defined(INET) || defined(INET6) +#ifdef SCTP +extern void sctp_addr_change(struct ifaddr *ifa, int cmd); +#endif /* SCTP */ +#endif + + /* This is read-only.. */ u_int rt_numfibs = RT_NUMFIBS; SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); /* and this can be set too big but will be fixed before it is used */ TUNABLE_INT("net.fibs", &rt_numfibs); /* * By default add routes to all fibs for new interfaces. * Once this is set to 0 then only allocate routes on interface * changes for the FIB of the caller when adding a new set of addresses * to an interface. XXX this is a shotgun aproach to a problem that needs * a more fine grained solution.. that will come. * XXX also has the problems getting the FIB from curthread which will not * always work given the fib can be overridden and prefixes can be added * from the network stack context. */ u_int rt_add_addr_allfibs = 1; SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, &rt_add_addr_allfibs, 0, ""); TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); VNET_DEFINE(struct rtstat, rtstat); #define V_rtstat VNET(rtstat) VNET_DEFINE(struct radix_node_head *, rt_tables); #define V_rt_tables VNET(rt_tables) VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ #define V_rttrash VNET(rttrash) /* compare two sockaddr structures */ -#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) +#define sa_equal(a1, a2) (((a1)->sa_len == (a2)->sa_len) && \ + (bcmp((a1), (a2), (a1)->sa_len) == 0)) /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. * The operation can be done safely (in this code) because a * 'struct rtentry' starts with two 'struct radix_node''s, the first * one representing leaf nodes in the routing tree, which is * what the code in radix.c passes us as a 'struct radix_node'. * * But because there are a lot of assumptions in this conversion, * do not cast explicitly, but always use the macro below. */ #define RNTORT(p) ((struct rtentry *)(p)) static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) /* * handler for net.my_fibnum */ static int sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) { int fibnum; int error; fibnum = curthread->td_proc->p_fibnum; error = sysctl_handle_int(oidp, &fibnum, 0, req); return (error); } SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); static __inline struct radix_node_head ** rt_tables_get_rnh_ptr(int table, int fam) { struct radix_node_head **rnh; KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", __func__)); KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", __func__)); /* rnh is [fib=0][af=0]. */ rnh = (struct radix_node_head **)V_rt_tables; /* Get the offset to the requested table and fam. */ rnh += table * (AF_MAX+1) + fam; return (rnh); } struct radix_node_head * rt_tables_get_rnh(int table, int fam) { return (*rt_tables_get_rnh_ptr(table, fam)); } /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ static void route_init(void) { /* whack the tunable ints into line. */ if (rt_numfibs > RT_MAXFIBS) rt_numfibs = RT_MAXFIBS; if (rt_numfibs == 0) rt_numfibs = 1; } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); static void vnet_route_init(const void *unused __unused) { struct domain *dom; struct radix_node_head **rnh; int table; int fam; V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtattach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; /* * XXX MRT rtattach will be also called from * vfs_export.c but the offset will be 0 (only for * AF_INET and AF_INET6 which don't need it anyhow). */ rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); } } } VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, vnet_route_init, 0); #ifdef VIMAGE static void vnet_route_uninit(const void *unused __unused) { int table; int fam; struct domain *dom; struct radix_node_head **rnh; for (dom = domains; dom; dom = dom->dom_next) { if (dom->dom_rtdetach == NULL) continue; for (table = 0; table < rt_numfibs; table++) { fam = dom->dom_family; if (table != 0 && fam != AF_INET6 && fam != AF_INET) break; rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); } } free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, vnet_route_uninit, 0); #endif #ifndef _SYS_SYSPROTO_H_ struct setfib_args { int fibnum; }; #endif int sys_setfib(struct thread *td, struct setfib_args *uap) { if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) return EINVAL; td->td_proc->p_fibnum = uap->fibnum; return (0); } /* * Packet routing routines. */ void rtalloc(struct route *ro) { rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); } void rtalloc_fib(struct route *ro, u_int fibnum) { rtalloc_ign_fib(ro, 0UL, fibnum); } void rtalloc_ign(struct route *ro, u_long ignore) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } void rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) { struct rtentry *rt; if ((rt = ro->ro_rt) != NULL) { if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) return; RTFREE(rt); ro->ro_rt = NULL; } ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); if (ro->ro_rt) RT_UNLOCK(ro->ro_rt); } /* * Look up the route that matches the address given * Or, at least try.. Create a cloned route if needed. * * The returned route, if any, is locked. */ struct rtentry * rtalloc1(struct sockaddr *dst, int report, u_long ignflags) { return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); } struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { struct radix_node_head *rnh; struct radix_node *rn; struct rtentry *newrt; struct rt_addrinfo info; int err = 0, msgtype = RTM_MISS; int needlock; KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } rnh = rt_tables_get_rnh(fibnum, dst->sa_family); newrt = NULL; if (rnh == NULL) goto miss; /* * Look up the address in the table for that Address Family */ needlock = !(ignflags & RTF_RNH_LOCKED); if (needlock) RADIX_NODE_HEAD_RLOCK(rnh); #ifdef INVARIANTS else RADIX_NODE_HEAD_LOCK_ASSERT(rnh); #endif rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { newrt = RNTORT(rn); RT_LOCK(newrt); RT_ADDREF(newrt); if (needlock) RADIX_NODE_HEAD_RUNLOCK(rnh); goto done; } else if (needlock) RADIX_NODE_HEAD_RUNLOCK(rnh); /* * Either we hit the root or couldn't find any match, * Which basically means * "caint get there frm here" */ miss: V_rtstat.rts_unreach++; if (report) { /* * If required, report the failure to the supervising * Authorities. * For a delete, this is not an error. (report == 0) */ bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; rt_missmsg_fib(msgtype, &info, 0, err, fibnum); } done: if (newrt) RT_LOCK_ASSERT(newrt); return (newrt); } /* * Remove a reference count from an rtentry. * If the count gets low enough, take it out of the routing table */ void rtfree(struct rtentry *rt) { struct radix_node_head *rnh; KASSERT(rt != NULL,("%s: NULL rt", __func__)); rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); RT_LOCK_ASSERT(rt); /* * The callers should use RTFREE_LOCKED() or RTFREE(), so * we should come here exactly with the last reference. */ RT_REMREF(rt); if (rt->rt_refcnt > 0) { log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); goto done; } /* * On last reference give the "close method" a chance * to cleanup private state. This also permits (for * IPv4 and IPv6) a chance to decide if the routing table * entry should be purged immediately or at a later time. * When an immediate purge is to happen the close routine * typically calls rtexpunge which clears the RTF_UP flag * on the entry so that the code below reclaims the storage. */ if (rt->rt_refcnt == 0 && rnh->rnh_close) rnh->rnh_close((struct radix_node *)rt, rnh); /* * If we are no longer "up" (and ref == 0) * then we can free the resources associated * with the route. */ if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); /* * the rtentry must have been removed from the routing table * so it is represented in rttrash.. remove that now. */ V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif /* * release references on items we hold them on.. * e.g other routes and ifaddrs. */ if (rt->rt_ifa) ifa_free(rt->rt_ifa); /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. */ Free(rt_key(rt)); /* * and the rtentry itself of course */ RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); return; } done: RT_UNLOCK(rt); } /* * Force a routing table entry to the specified * destination to go through the given gateway. * Normally called as a result of a routing redirect * message from the network layer. */ void rtredirect(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src) { rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); } void rtredirect_fib(struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct sockaddr *src, u_int fibnum) { struct rtentry *rt, *rt0 = NULL; int error = 0; short *stat = NULL; struct rt_addrinfo info; struct ifaddr *ifa; struct radix_node_head *rnh; ifa = NULL; rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) { error = EAFNOSUPPORT; goto out; } /* verify the gateway is directly reachable */ if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) { error = ENETUNREACH; goto out; } rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ if (!(flags & RTF_DONE) && rt && (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) error = EINVAL; else if (ifa_ifwithaddr_check(gateway)) error = EHOSTUNREACH; if (error) goto done; /* * Create a new entry if we just got back a wildcard entry * or the lookup failed. This is necessary for hosts * which use routing redirects generated by smart gateways * to dynamically build the routing tables. */ if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) goto create; /* * Don't listen to the redirect if it's * for a route to an interface. */ if (rt->rt_flags & RTF_GATEWAY) { if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { /* * Changing from route to net => route to host. * Create new route, rather than smashing route to net. */ create: rt0 = rt; rt = NULL; flags |= RTF_GATEWAY | RTF_DYNAMIC; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_ifa = ifa; info.rti_flags = flags; if (rt0 != NULL) RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); if (rt != NULL) { RT_LOCK(rt); if (rt0 != NULL) EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); flags = rt->rt_flags; } if (rt0 != NULL) RTFREE(rt0); stat = &V_rtstat.rts_dynamic; } else { struct rtentry *gwrt; /* * Smash the current notion of the gateway to * this destination. Should check about netmask!!! */ rt->rt_flags |= RTF_MODIFIED; flags |= RTF_MODIFIED; stat = &V_rtstat.rts_newgateway; /* * add the key and gateway (in one malloc'd chunk). */ RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); rt_setgate(rt, rt_key(rt), gateway); gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); RADIX_NODE_HEAD_UNLOCK(rnh); EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); RTFREE_LOCKED(gwrt); } } else error = EHOSTUNREACH; done: if (rt) RTFREE_LOCKED(rt); out: if (error) V_rtstat.rts_badredirect++; else if (stat != NULL) (*stat)++; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); if (ifa != NULL) ifa_free(ifa); } int rtioctl(u_long req, caddr_t data) { return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); } /* * Routing table ioctl interface. */ int rtioctl_fib(u_long req, caddr_t data, u_int fibnum) { /* * If more ioctl commands are added here, make sure the proper * super-user checks are being performed because it is possible for * prison-root to make it this far if raw sockets have been enabled * in jails. */ #ifdef INET /* Multicast goop, grrr... */ return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; #else /* INET */ return ENXIO; #endif /* INET */ } /* * For both ifa_ifwithroute() routines, 'ifa' is returned referenced. */ struct ifaddr * ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) { return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); } struct ifaddr * ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, u_int fibnum) { register struct ifaddr *ifa; int not_found = 0; if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, * and the interface is a pt to pt link * we should search for the destination * as our clue to the interface. Otherwise * we can use the local address. */ ifa = NULL; if (flags & RTF_HOST) ifa = ifa_ifwithdstaddr(dst); if (ifa == NULL) ifa = ifa_ifwithaddr(gateway); } else { /* * If we are adding a route to a remote net * or host, the gateway may still be on the * other end of a pt to pt link. */ ifa = ifa_ifwithdstaddr(gateway); } if (ifa == NULL) ifa = ifa_ifwithnet(gateway, 0); if (ifa == NULL) { struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); if (rt == NULL) return (NULL); /* * dismiss a gateway that is reachable only * through the default router */ switch (gateway->sa_family) { case AF_INET: if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) not_found = 1; break; case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) not_found = 1; break; default: break; } if (!not_found && rt->rt_ifa != NULL) { ifa = rt->rt_ifa; ifa_ref(ifa); } RT_REMREF(rt); RT_UNLOCK(rt); if (not_found || ifa == NULL) return (NULL); } if (ifa->ifa_addr->sa_family != dst->sa_family) { struct ifaddr *oifa = ifa; ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); if (ifa == NULL) ifa = oifa; else ifa_free(oifa); } return (ifa); } /* * Do appropriate manipulations of a routing tree given * all the bits of info needed */ int rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt) { return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, RT_DEFAULT_FIB)); } int rtrequest_fib(int req, struct sockaddr *dst, struct sockaddr *gateway, struct sockaddr *netmask, int flags, struct rtentry **ret_nrt, u_int fibnum) { struct rt_addrinfo info; if (dst->sa_len == 0) return(EINVAL); bzero((caddr_t)&info, sizeof(info)); info.rti_flags = flags; info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_NETMASK] = netmask; return rtrequest1_fib(req, &info, ret_nrt, fibnum); } /* * These (questionable) definitions of apparent local variables apply * to the next two functions. XXXXXX!!! */ #define dst info->rti_info[RTAX_DST] #define gateway info->rti_info[RTAX_GATEWAY] #define netmask info->rti_info[RTAX_NETMASK] #define ifaaddr info->rti_info[RTAX_IFA] #define ifpaddr info->rti_info[RTAX_IFP] #define flags info->rti_flags int rt_getifa(struct rt_addrinfo *info) { return (rt_getifa_fib(info, RT_DEFAULT_FIB)); } /* * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, * it will be referenced so the caller must free it. */ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { struct ifaddr *ifa; int error = 0; /* * ifp may be specified by sockaddr_dl * when protocol address is ambiguous. */ if (info->rti_ifp == NULL && ifpaddr != NULL && ifpaddr->sa_family == AF_LINK && (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) { info->rti_ifp = ifa->ifa_ifp; ifa_free(ifa); } if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { struct sockaddr *sa; sa = ifaaddr != NULL ? ifaaddr : (gateway != NULL ? gateway : dst); if (sa != NULL && info->rti_ifp != NULL) info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); else if (dst != NULL && gateway != NULL) info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, fibnum); else if (sa != NULL) info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, fibnum); } if ((ifa = info->rti_ifa) != NULL) { if (info->rti_ifp == NULL) info->rti_ifp = ifa->ifa_ifp; } else error = ENETUNREACH; return (error); } /* * Expunges references to a route that's about to be reclaimed. * The route must be locked. */ int rtexpunge(struct rtentry *rt) { #if !defined(RADIX_MPATH) struct radix_node *rn; #else struct rt_addrinfo info; int fib; struct rtentry *rt0; #endif struct radix_node_head *rnh; struct ifaddr *ifa; int error = 0; /* * Find the correct routing tree to use for this Address Family */ rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); RT_LOCK_ASSERT(rt); if (rnh == NULL) return (EAFNOSUPPORT); RADIX_NODE_HEAD_LOCK_ASSERT(rnh); #ifdef RADIX_MPATH fib = rt->rt_fibnum; bzero(&info, sizeof(info)); info.rti_ifp = rt->rt_ifp; info.rti_flags = RTF_RNH_LOCKED; info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr; RT_UNLOCK(rt); error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib); if (error == 0 && rt0 != NULL) { rt = rt0; RT_LOCK(rt); } else if (error != 0) { RT_LOCK(rt); return (error); } #else /* * Remove the item from the tree; it should be there, * but when callers invoke us blindly it may not (sigh). */ rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); if (rn == NULL) { error = ESRCH; goto bad; } KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, ("unexpected flags 0x%x", rn->rn_flags)); KASSERT(rt == RNTORT(rn), ("lookup mismatch, rt %p rn %p", rt, rn)); #endif /* RADIX_MPATH */ rt->rt_flags &= ~RTF_UP; /* * Give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { struct rt_addrinfo info; bzero((caddr_t)&info, sizeof(info)); info.rti_flags = rt->rt_flags; info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); ifa->ifa_rtrequest(RTM_DELETE, rt, &info); } /* * one more rtentry floating around that is not * linked to the routing table. */ V_rttrash++; #if !defined(RADIX_MPATH) bad: #endif return (error); } #if 0 int p_sockaddr(char *buf, int buflen, struct sockaddr *s); int rt_print(char *buf, int buflen, struct rtentry *rt); int p_sockaddr(char *buf, int buflen, struct sockaddr *s) { void *paddr = NULL; switch (s->sa_family) { case AF_INET: paddr = &((struct sockaddr_in *)s)->sin_addr; break; case AF_INET6: paddr = &((struct sockaddr_in6 *)s)->sin6_addr; break; } if (paddr == NULL) return (0); if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) return (0); return (strlen(buf)); } int rt_print(char *buf, int buflen, struct rtentry *rt) { struct sockaddr *addr, *mask; int i = 0; addr = rt_key(rt); mask = rt_mask(rt); i = p_sockaddr(buf, buflen, addr); if (!(rt->rt_flags & RTF_HOST)) { buf[i++] = '/'; i += p_sockaddr(buf + i, buflen - i, mask); } if (rt->rt_flags & RTF_GATEWAY) { buf[i++] = '>'; i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway); } return (i); } #endif #ifdef RADIX_MPATH static int rn_mpath_update(int req, struct rt_addrinfo *info, struct radix_node_head *rnh, struct rtentry **ret_nrt) { /* * if we got multipath routes, we require users to specify * a matching RTAX_GATEWAY. */ struct rtentry *rt, *rto = NULL; register struct radix_node *rn; int error = 0; rn = rnh->rnh_lookup(dst, netmask, rnh); if (rn == NULL) return (ESRCH); rto = rt = RNTORT(rn); rt = rt_mpath_matchgate(rt, gateway); if (rt == NULL) return (ESRCH); /* * this is the first entry in the chain */ if (rto == rt) { rn = rn_mpath_next((struct radix_node *)rt); /* * there is another entry, now it's active */ if (rn) { rto = RNTORT(rn); RT_LOCK(rto); rto->rt_flags |= RTF_UP; RT_UNLOCK(rto); } else if (rt->rt_flags & RTF_GATEWAY) { /* * For gateway routes, we need to * make sure that we we are deleting * the correct gateway. * rt_mpath_matchgate() does not * check the case when there is only * one route in the chain. */ if (gateway && (rt->rt_gateway->sa_len != gateway->sa_len || memcmp(rt->rt_gateway, gateway, gateway->sa_len))) error = ESRCH; else { /* * remove from tree before returning it * to the caller */ rn = rnh->rnh_deladdr(dst, netmask, rnh); KASSERT(rt == RNTORT(rn), ("radix node disappeared")); goto gwdelete; } } /* * use the normal delete code to remove * the first entry */ if (req != RTM_DELETE) goto nondelete; error = ENOENT; goto done; } /* * if the entry is 2nd and on up */ if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) panic ("rtrequest1: rt_mpath_deldup"); gwdelete: RT_LOCK(rt); RT_ADDREF(rt); if (req == RTM_DELETE) { rt->rt_flags &= ~RTF_UP; /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented * when RTFREE(rt) is eventually called. */ V_rttrash++; } nondelete: if (req != RTM_DELETE) panic("unrecognized request %d", req); /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) { *ret_nrt = rt; RT_UNLOCK(rt); } else RTFREE_LOCKED(rt); done: return (error); } #endif int rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { int error = 0, needlock = 0; register struct rtentry *rt; #ifdef FLOWTABLE register struct rtentry *rt0; #endif register struct radix_node *rn; register struct radix_node_head *rnh; struct ifaddr *ifa; struct sockaddr *ndst; struct sockaddr_storage mdst; #define senderr(x) { error = x ; goto bad; } KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } /* * Find the correct routing tree to use for this Address Family */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) return (EAFNOSUPPORT); needlock = ((flags & RTF_RNH_LOCKED) == 0); flags &= ~RTF_RNH_LOCKED; if (needlock) RADIX_NODE_HEAD_LOCK(rnh); else RADIX_NODE_HEAD_LOCK_ASSERT(rnh); /* * If we are adding a host route then we don't want to put * a netmask in the tree, nor do we want to clone it. */ if (flags & RTF_HOST) netmask = NULL; switch (req) { case RTM_DELETE: if (netmask) { rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); dst = (struct sockaddr *)&mdst; } #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) { error = rn_mpath_update(req, info, rnh, ret_nrt); /* * "bad" holds true for the success case * as well */ if (error != ENOENT) goto bad; error = 0; } #endif if ((flags & RTF_PINNED) == 0) { /* Check if target route can be deleted */ rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, rnh); if ((rt != NULL) && (rt->rt_flags & RTF_PINNED)) senderr(EADDRINUSE); } /* * Remove the item from the tree and return it. * Complain if it is not there and do no more processing. */ rn = rnh->rnh_deladdr(dst, netmask, rnh); if (rn == NULL) senderr(ESRCH); if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic ("rtrequest delete"); rt = RNTORT(rn); RT_LOCK(rt); RT_ADDREF(rt); rt->rt_flags &= ~RTF_UP; /* * give the protocol a chance to keep things in sync. */ if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) ifa->ifa_rtrequest(RTM_DELETE, rt, info); /* * One more rtentry floating around that is not * linked to the routing table. rttrash will be decremented * when RTFREE(rt) is eventually called. */ V_rttrash++; /* * If the caller wants it, then it can have it, * but it's up to it to free the rtentry as we won't be * doing it. */ if (ret_nrt) { *ret_nrt = rt; RT_UNLOCK(rt); } else RTFREE_LOCKED(rt); break; case RTM_RESOLVE: /* * resolve was only used for route cloning * here for compat */ break; case RTM_ADD: if ((flags & RTF_GATEWAY) && !gateway) senderr(EINVAL); if (dst && gateway && (dst->sa_family != gateway->sa_family) && (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) senderr(EINVAL); if (info->rti_ifa == NULL) { error = rt_getifa_fib(info, fibnum); if (error) senderr(error); } else ifa_ref(info->rti_ifa); ifa = info->rti_ifa; rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); if (rt == NULL) { ifa_free(ifa); senderr(ENOBUFS); } RT_LOCK_INIT(rt); rt->rt_flags = RTF_UP | flags; rt->rt_fibnum = fibnum; /* * Add the gateway. Possibly re-malloc-ing the storage for it. */ RT_LOCK(rt); if ((error = rt_setgate(rt, dst, gateway)) != 0) { RT_LOCK_DESTROY(rt); ifa_free(ifa); uma_zfree(V_rtzone, rt); senderr(error); } /* * point to the (possibly newly malloc'd) dest address. */ ndst = (struct sockaddr *)rt_key(rt); /* * make sure it contains the value we want (masked if needed). */ if (netmask) { rt_maskedcopy(dst, ndst, netmask); } else bcopy(dst, ndst, dst->sa_len); /* * We use the ifa reference returned by rt_getifa_fib(). * This moved from below so that rnh->rnh_addaddr() can * examine the ifa and ifa->ifa_ifp if it so desires. */ rt->rt_ifa = ifa; rt->rt_ifp = ifa->ifa_ifp; rt->rt_rmx.rmx_weight = 1; #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ if (rn_mpath_capable(rnh) && rt_mpath_conflict(rnh, rt, netmask)) { ifa_free(rt->rt_ifa); Free(rt_key(rt)); RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); senderr(EEXIST); } #endif #ifdef FLOWTABLE rt0 = NULL; /* "flow-table" only supports IPv6 and IPv4 at the moment. */ switch (dst->sa_family) { #ifdef INET6 case AF_INET6: #endif #ifdef INET case AF_INET: #endif #if defined(INET6) || defined(INET) rn = rnh->rnh_matchaddr(dst, rnh); if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { struct sockaddr *mask; u_char *m, *n; int len; /* * compare mask to see if the new route is * more specific than the existing one */ rt0 = RNTORT(rn); RT_LOCK(rt0); RT_ADDREF(rt0); RT_UNLOCK(rt0); /* * A host route is already present, so * leave the flow-table entries as is. */ if (rt0->rt_flags & RTF_HOST) { RTFREE(rt0); rt0 = NULL; } else if (!(flags & RTF_HOST) && netmask) { mask = rt_mask(rt0); len = mask->sa_len; m = (u_char *)mask; n = (u_char *)netmask; while (len-- > 0) { if (*n != *m) break; n++; m++; } if (len == 0 || (*n < *m)) { RTFREE(rt0); rt0 = NULL; } } } #endif/* INET6 || INET */ } #endif /* FLOWTABLE */ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); /* * If it still failed to go into the tree, * then un-make it (this should be a function) */ if (rn == NULL) { ifa_free(rt->rt_ifa); Free(rt_key(rt)); RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); #ifdef FLOWTABLE if (rt0 != NULL) RTFREE(rt0); #endif senderr(EEXIST); } #ifdef FLOWTABLE else if (rt0 != NULL) { switch (dst->sa_family) { #ifdef INET6 case AF_INET6: flowtable_route_flush(V_ip6_ft, rt0); break; #endif #ifdef INET case AF_INET: flowtable_route_flush(V_ip_ft, rt0); break; #endif } RTFREE(rt0); } #endif /* * If this protocol has something to add to this then * allow it to do that as well. */ if (ifa->ifa_rtrequest) ifa->ifa_rtrequest(req, rt, info); /* * actually return a resultant rtentry and * give the caller a single reference. */ if (ret_nrt) { *ret_nrt = rt; RT_ADDREF(rt); } RT_UNLOCK(rt); break; default: error = EOPNOTSUPP; } bad: if (needlock) RADIX_NODE_HEAD_UNLOCK(rnh); return (error); #undef senderr } #undef dst #undef gateway #undef netmask #undef ifaaddr #undef ifpaddr #undef flags int rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) { /* XXX dst may be overwritten, can we move this to below */ int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); #ifdef INVARIANTS struct radix_node_head *rnh; rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); #endif RT_LOCK_ASSERT(rt); RADIX_NODE_HEAD_LOCK_ASSERT(rnh); /* * Prepare to store the gateway in rt->rt_gateway. * Both dst and gateway are stored one after the other in the same * malloc'd chunk. If we have room, we can reuse the old buffer, * rt_gateway already points to the right place. * Otherwise, malloc a new block and update the 'dst' address. */ if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { caddr_t new; R_Malloc(new, caddr_t, dlen + glen); if (new == NULL) return ENOBUFS; /* * XXX note, we copy from *dst and not *rt_key(rt) because * rt_setgate() can be called to initialize a newly * allocated route entry, in which case rt_key(rt) == NULL * (and also rt->rt_gateway == NULL). * Free()/free() handle a NULL argument just fine. */ bcopy(dst, new, dlen); Free(rt_key(rt)); /* free old block, if any */ rt_key(rt) = (struct sockaddr *)new; rt->rt_gateway = (struct sockaddr *)(new + dlen); } /* * Copy the new gateway value into the memory chunk. */ bcopy(gate, rt->rt_gateway, glen); return (0); } void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) { register u_char *cp1 = (u_char *)src; register u_char *cp2 = (u_char *)dst; register u_char *cp3 = (u_char *)netmask; u_char *cplim = cp2 + *cp3; u_char *cplim2 = cp2 + *cp1; *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ cp3 += 2; if (cplim > cplim2) cplim = cplim2; while (cp2 < cplim) *cp2++ = *cp1++ & *cp3++; if (cp2 < cplim2) bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); } /* * Set up a routing table entry, normally * for an interface. */ #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; struct rt_addrinfo info; int error = 0; int startfib, endfib; char tempbuf[_SOCKADDR_TMPSIZE]; int didwork = 0; int a_failure = 0; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; struct radix_node_head *rnh; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; netmask = NULL; } else { dst = ifa->ifa_addr; netmask = ifa->ifa_netmask; } if (dst->sa_len == 0) return(EINVAL); switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We support multiple FIBs. */ break; default: fibnum = RT_DEFAULT_FIB; break; } if (fibnum == RT_ALL_FIBS) { if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { startfib = endfib = curthread->td_proc->p_fibnum; } else { startfib = 0; endfib = rt_numfibs - 1; } } else { KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); startfib = fibnum; endfib = fibnum; } /* * If it's a delete, check that if it exists, * it's on the correct interface or we might scrub * a route to another ifa which would * be confusing at best and possibly worse. */ if (cmd == RTM_DELETE) { /* * It's a delete, so it should already exist.. * If it's a net, mask off the host bits * (Assuming we have a mask) * XXX this is kinda inet specific.. */ if (netmask != NULL) { rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); dst = (struct sockaddr *)tempbuf; } } /* * Now go through all the requested tables (fibs) and do the * requested action. Realistically, this will either be fib 0 * for protocols that don't do multiple tables or all the * tables for those that do. */ for ( fibnum = startfib; fibnum <= endfib; fibnum++) { if (cmd == RTM_DELETE) { struct radix_node *rn; /* * Look up an rtentry that is in the routing tree and * contains the correct info. */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); if (rnh == NULL) /* this table doesn't exist but others might */ continue; RADIX_NODE_HEAD_RLOCK(rnh); rn = rnh->rnh_lookup(dst, netmask, rnh); #ifdef RADIX_MPATH if (rn_mpath_capable(rnh)) { if (rn == NULL) error = ESRCH; else { rt = RNTORT(rn); /* * for interface route the * rt->rt_gateway is sockaddr_intf * for cloning ARP entries, so * rt_mpath_matchgate must use the * interface address */ rt = rt_mpath_matchgate(rt, ifa->ifa_addr); if (rt == NULL) error = ESRCH; } } #endif error = (rn == NULL || (rn->rn_flags & RNF_ROOT) || RNTORT(rn)->rt_ifa != ifa); RADIX_NODE_HEAD_RUNLOCK(rnh); if (error) { /* this is only an error if bad on ALL tables */ continue; } } /* * Do the actual request */ bzero((caddr_t)&info, sizeof(info)); info.rti_ifa = ifa; info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; info.rti_info[RTAX_DST] = dst; /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; error = rtrequest1_fib(cmd, &info, &rt, fibnum); if ((error == EEXIST) && (cmd == RTM_ADD)) { /* * Interface route addition failed. * Atomically delete current prefix generating * RTM_DELETE message, and retry adding * interface prefix. */ rnh = rt_tables_get_rnh(fibnum, dst->sa_family); RADIX_NODE_HEAD_LOCK(rnh); /* Delete old prefix */ info.rti_ifa = NULL; info.rti_flags = RTF_RNH_LOCKED; error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); if (error == 0) { info.rti_ifa = ifa; info.rti_flags = flags | RTF_RNH_LOCKED | (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; error = rtrequest1_fib(cmd, &info, &rt, fibnum); } RADIX_NODE_HEAD_UNLOCK(rnh); } if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ RT_LOCK(rt); #ifdef RADIX_MPATH /* * in case address alias finds the first address * e.g. ifconfig bge0 192.0.2.246/24 * e.g. ifconfig bge0 192.0.2.247/24 * the address set in the route is 192.0.2.246 * so we need to replace it with 192.0.2.247 */ if (memcmp(rt->rt_ifa->ifa_addr, ifa->ifa_addr, ifa->ifa_addr->sa_len)) { ifa_free(rt->rt_ifa); ifa_ref(ifa); rt->rt_ifp = ifa->ifa_ifp; rt->rt_ifa = ifa; } #endif /* * doing this for compatibility reasons */ if (cmd == RTM_ADD) { ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = rt->rt_ifp->if_type; ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = rt->rt_ifp->if_index; } RT_ADDREF(rt); RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); RT_LOCK(rt); RT_REMREF(rt); if (cmd == RTM_DELETE) { /* * If we are deleting, and we found an entry, * then it's been removed from the tree.. * now throw it away. */ RTFREE_LOCKED(rt); } else { if (cmd == RTM_ADD) { /* * We just wanted to add it.. * we don't actually need a reference. */ RT_REMREF(rt); } RT_UNLOCK(rt); } didwork = 1; } if (error) a_failure = error; } if (cmd == RTM_DELETE) { if (didwork) { error = 0; } else { /* we only give an error if it wasn't in any table */ error = ((flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH); } } else { if (a_failure) { /* return an error if any of them failed */ error = a_failure; } } return (error); } #ifndef BURN_BRIDGES /* special one for inet internal use. may not use. */ int rtinit_fib(struct ifaddr *ifa, int cmd, int flags) { return (rtinit1(ifa, cmd, flags, RT_ALL_FIBS)); } #endif /* * Set up a routing table entry, normally * for an interface. */ int rtinit(struct ifaddr *ifa, int cmd, int flags) { struct sockaddr *dst; int fib = RT_DEFAULT_FIB; if (flags & RTF_HOST) { dst = ifa->ifa_dstaddr; } else { dst = ifa->ifa_addr; } switch (dst->sa_family) { case AF_INET6: case AF_INET: /* We do support multiple FIBs. */ fib = RT_ALL_FIBS; break; } return (rtinit1(ifa, cmd, flags, fib)); } + +/* + * Announce interface address arrival/withdraw + * Returns 0 on success. + */ +int +rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) +{ + + KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, + ("unexpected cmd %u", cmd)); + + if (fibnum != RT_ALL_FIBS) { + KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: " + "fibnum out of range 0 <= %d < %d", __func__, + fibnum, rt_numfibs)); + } + + return (rtsock_addrmsg(cmd, ifa, fibnum)); +} + + +/* + * Announce route addition/removal + * Users of this function MUST validate input data BEFORE calling. + * However we have to be able to handle invalid data: + * if some userland app sends us "invalid" route message (invalid mask, + * no dst, wrokg address families, etc...) we need to pass it back + * to app (and any other rtsock consumers) with rtm_errno field set to + * non-zero value. + * Returns 0 on success. + */ +int +rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, + int fibnum) +{ + + KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, + ("unexpected cmd %u", cmd)); + + if (fibnum != RT_ALL_FIBS) { + KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: " + "fibnum out of range 0 <= %d < %d", __func__, + fibnum, rt_numfibs)); + } + + KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__)); + + return (rtsock_routemsg(cmd, ifp, error, rt, fibnum)); +} + +void +rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) +{ + + rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS); +} + +/* + * This is called to generate messages from the routing socket + * indicating a network interface has had addresses associated with it. + */ +void +rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, + int fibnum) +{ + + KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, + ("unexpected cmd %u", cmd)); + if (fibnum != RT_ALL_FIBS) { + KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: " + "fibnum out of range 0 <= %d < %d", __func__, + fibnum, rt_numfibs)); + } + +#if defined(INET) || defined(INET6) +#ifdef SCTP + /* + * notify the SCTP stack + * this will only get called when an address is added/deleted + * XXX pass the ifaddr struct instead if ifa->ifa_addr... + */ + sctp_addr_change(ifa, cmd); +#endif /* SCTP */ +#endif + if (cmd == RTM_ADD) { + rt_addrmsg(cmd, ifa, fibnum); + if (rt != NULL) + rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); + } else { + if (rt != NULL) + rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum); + rt_addrmsg(cmd, ifa, fibnum); + } +} + diff --git a/sys/net/route.h b/sys/net/route.h index 8ff8a2180736..1a033481e3f3 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -1,433 +1,438 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)route.h 8.4 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NET_ROUTE_H_ #define _NET_ROUTE_H_ /* * Kernel resident routing tables. * * The routing tables are initialized when interface addresses * are set by making entries for all directly connected interfaces. */ /* * A route consists of a destination address, a reference * to a routing entry, and a reference to an llentry. * These are often held by protocols in their control * blocks, e.g. inpcb. */ struct route { struct rtentry *ro_rt; struct llentry *ro_lle; struct in_ifaddr *ro_ia; int ro_flags; struct sockaddr ro_dst; }; #define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */ #define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */ /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ struct rt_metrics_lite { u_long rmx_mtu; /* MTU for this path */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* absolute weight */ }; struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ u_long rmx_hopcount; /* max hops expected */ u_long rmx_expire; /* lifetime for route, e.g. redirect */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt; /* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ u_long rmx_pksent; /* packets sent using this route */ u_long rmx_weight; /* route weight */ u_long rmx_filler[3]; /* will be used for T/TCP later */ }; /* * rmx_rtt and rmx_rttvar are stored as microseconds; * RTTTOPRHZ(rtt) converts to a value suitable for use * by a protocol slowtimo counter. */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ #define RTTTOPRHZ(r) ((r) / (RTM_RTTUNIT / PR_SLOWHZ)) #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ extern u_int rt_numfibs; /* number of usable routing tables */ /* * XXX kernel function pointer `rt_output' is visible to applications. */ struct mbuf; /* * We distinguish between routes to hosts and routes to networks, * preferring the former if available. For each route we infer * the interface to use from the gateway address supplied when * the route was entered. Routes that forward packets through * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ #ifndef RNF_NORMAL #include #ifdef RADIX_MPATH #include #endif #endif struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* * XXX struct rtentry must begin with a struct radix_node (or two!) * because the code does some casts of a 'struct radix_node *' * to a 'struct rtentry *' */ #define rt_key(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_key))) #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) struct sockaddr *rt_gateway; /* value */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ struct ifnet *rt_ifp; /* the answer: interface to use */ struct ifaddr *rt_ifa; /* the answer: interface address to use */ struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */ u_int rt_fibnum; /* which FIB */ #ifdef _KERNEL /* XXX ugly, user apps use this definition but don't have a mtx def */ struct mtx rt_mtx; /* mutex for routing entry */ #endif }; /* * Following structure necessary for 4.3 compatibility; * We should eventually move it to a compat file. */ struct ortentry { u_long rt_hash; /* to speed lookups */ struct sockaddr rt_dst; /* key */ struct sockaddr rt_gateway; /* value */ short rt_flags; /* up/down?, host/net */ short rt_refcnt; /* # held references */ u_long rt_use; /* raw # packets forwarded */ struct ifnet *rt_ifp; /* the answer: interface to use */ }; #define rt_use rt_rmx.rmx_pksent #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ #define RTF_HOST 0x4 /* host entry (net otherwise) */ #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ #define RTF_DONE 0x40 /* message confirmed */ /* 0x80 unused, was RTF_DELCLONE */ /* 0x100 unused, was RTF_CLONING */ #define RTF_XRESOLVE 0x200 /* external daemon resolves name */ #define RTF_LLINFO 0x400 /* DEPRECATED - exists ONLY for backward compatibility */ #define RTF_LLDATA 0x400 /* used by apps to add/del L2 entries */ #define RTF_STATIC 0x800 /* manually added */ #define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ /* XXX: temporary to stay API/ABI compatible with userland */ #ifndef _KERNEL #define RTF_PRCLONING 0x10000 /* unused, for compatibility */ #endif /* 0x20000 unused, was RTF_WASCLONED */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ #define RTF_PINNED 0x100000 /* route is immutable */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ /* 0x8000000 and up unassigned */ #define RTF_STICKY 0x10000000 /* always route dst->src */ #define RTF_RNH_LOCKED 0x40000000 /* radix node head is locked */ #define RTF_GWFLAG_COMPAT 0x80000000 /* a compatibility bit for interacting with existing routing apps */ /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \ (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \ RTF_REJECT | RTF_STATIC | RTF_STICKY) /* * Routing statistics. */ struct rtstat { short rts_badredirect; /* bogus redirect calls */ short rts_dynamic; /* routes created by redirects */ short rts_newgateway; /* routes modified by redirects */ short rts_unreach; /* lookups which failed */ short rts_wildcard; /* lookups satisfied by a wildcard */ }; /* * Structures for routing messages. */ struct rt_msghdr { u_short rtm_msglen; /* to skip over non-understood messages */ u_char rtm_version; /* future binary compatibility */ u_char rtm_type; /* message type */ u_short rtm_index; /* index for associated ifp */ int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ int rtm_addrs; /* bitmask identifying sockaddrs in msg */ pid_t rtm_pid; /* identify sender */ int rtm_seq; /* for sender to identify action */ int rtm_errno; /* why failed */ int rtm_fmask; /* bitmask used in RTM_CHANGE message */ u_long rtm_inits; /* which metrics we are initializing */ struct rt_metrics rtm_rmx; /* metrics themselves */ }; #define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ #define RTM_ADD 0x1 /* Add Route */ #define RTM_DELETE 0x2 /* Delete Route */ #define RTM_CHANGE 0x3 /* Change Metrics or flags */ #define RTM_GET 0x4 /* Report Metrics */ #define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ #define RTM_REDIRECT 0x6 /* Told to use different route */ #define RTM_MISS 0x7 /* Lookup failed on this address */ #define RTM_LOCK 0x8 /* fix specified metrics */ #define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ #define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ #define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ #define RTM_NEWADDR 0xc /* address being added to iface */ #define RTM_DELADDR 0xd /* address being removed from iface */ #define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #define RTM_IFANNOUNCE 0x11 /* iface arrival/departure */ #define RTM_IEEE80211 0x12 /* IEEE80211 wireless event */ /* * Bitmask values for rtm_inits and rmx_locks. */ #define RTV_MTU 0x1 /* init or lock _mtu */ #define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ #define RTV_EXPIRE 0x4 /* init or lock _expire */ #define RTV_RPIPE 0x8 /* init or lock _recvpipe */ #define RTV_SPIPE 0x10 /* init or lock _sendpipe */ #define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ #define RTV_RTT 0x40 /* init or lock _rtt */ #define RTV_RTTVAR 0x80 /* init or lock _rttvar */ #define RTV_WEIGHT 0x100 /* init or lock _weight */ /* * Bitmask values for rtm_addrs. */ #define RTA_DST 0x1 /* destination sockaddr present */ #define RTA_GATEWAY 0x2 /* gateway sockaddr present */ #define RTA_NETMASK 0x4 /* netmask sockaddr present */ #define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ #define RTA_IFP 0x10 /* interface name sockaddr present */ #define RTA_IFA 0x20 /* interface addr sockaddr present */ #define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ #define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ #define RTAX_DST 0 /* destination sockaddr present */ #define RTAX_GATEWAY 1 /* gateway sockaddr present */ #define RTAX_NETMASK 2 /* netmask sockaddr present */ #define RTAX_GENMASK 3 /* cloning mask sockaddr present */ #define RTAX_IFP 4 /* interface name sockaddr present */ #define RTAX_IFA 5 /* interface addr sockaddr present */ #define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ #define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ #define RTAX_MAX 8 /* size of array to allocate */ struct rt_addrinfo { int rti_addrs; struct sockaddr *rti_info[RTAX_MAX]; int rti_flags; struct ifaddr *rti_ifa; struct ifnet *rti_ifp; }; /* * This macro returns the size of a struct sockaddr when passed * through a routing socket. Basically we round up sa_len to * a multiple of sizeof(long), with a minimum of sizeof(long). * The check for a NULL pointer is just a convenience, probably never used. * The case sa_len == 0 should only apply to empty structures. */ #define SA_SIZE(sa) \ ( (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ? \ sizeof(long) : \ 1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) ) #ifdef _KERNEL #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) #define RT_LOCK_INIT(_rt) \ mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK) #define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) #define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) #define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) #define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) #define RT_ADDREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt >= 0, \ ("negative refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt++; \ } while (0) #define RT_REMREF(_rt) do { \ RT_LOCK_ASSERT(_rt); \ KASSERT((_rt)->rt_refcnt > 0, \ ("bogus refcnt %d", (_rt)->rt_refcnt)); \ (_rt)->rt_refcnt--; \ } while (0) #define RTFREE_LOCKED(_rt) do { \ if ((_rt)->rt_refcnt <= 1) \ rtfree(_rt); \ else { \ RT_REMREF(_rt); \ RT_UNLOCK(_rt); \ } \ /* guard against invalid refs */ \ _rt = 0; \ } while (0) #define RTFREE(_rt) do { \ RT_LOCK(_rt); \ RTFREE_LOCKED(_rt); \ } while (0) #define RO_RTFREE(_ro) do { \ if ((_ro)->ro_rt) { \ if ((_ro)->ro_flags & RT_NORTREF) { \ (_ro)->ro_flags &= ~RT_NORTREF; \ (_ro)->ro_rt = NULL; \ } else { \ RT_LOCK((_ro)->ro_rt); \ RTFREE_LOCKED((_ro)->ro_rt); \ } \ } \ } while (0) struct radix_node_head *rt_tables_get_rnh(int, int); struct ifmultiaddr; void rt_ieee80211msg(struct ifnet *, int, void *, size_t); void rt_ifannouncemsg(struct ifnet *, int); void rt_ifmsg(struct ifnet *); void rt_missmsg(int, struct rt_addrinfo *, int, int); void rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int); void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); void rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int); +int rt_addrmsg(int, struct ifaddr *, int); +int rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); void rt_newmaddrmsg(int, struct ifmultiaddr *); int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); +int rtsock_addrmsg(int, struct ifaddr *, int); +int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); + /* * Note the following locking behavior: * * rtalloc_ign() and rtalloc() return ro->ro_rt unlocked * * rtalloc1() returns a locked rtentry * * rtfree() and RTFREE_LOCKED() require a locked rtentry * * RTFREE() uses an unlocked entry. */ int rtexpunge(struct rtentry *); void rtfree(struct rtentry *); int rt_check(struct rtentry **, struct rtentry **, struct sockaddr *); /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */ /* Thes are used by old code not yet converted to use multiple FIBS */ int rt_getifa(struct rt_addrinfo *); void rtalloc_ign(struct route *ro, u_long ignflags); void rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */ struct rtentry *rtalloc1(struct sockaddr *, int, u_long); int rtinit(struct ifaddr *, int, int); int rtioctl(u_long, caddr_t); void rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *); int rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **); #ifndef BURN_BRIDGES /* defaults to "all" FIBs */ int rtinit_fib(struct ifaddr *, int, int); #endif /* XXX MRT NEW VERSIONS THAT USE FIBs * For now the protocol indepedent versions are the same as the AF_INET ones * but this will change.. */ int rt_getifa_fib(struct rt_addrinfo *, u_int fibnum); void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); void rtalloc_fib(struct route *ro, u_int fibnum); struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); int rtioctl_fib(u_long, caddr_t, u_int); void rtredirect_fib(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int rtrequest_fib(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); #include typedef void (*rtevent_arp_update_fn)(void *, struct rtentry *, uint8_t *, struct sockaddr *); typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *); /* route_arp_update_event is no longer generated; see arp_update_event */ EVENTHANDLER_DECLARE(route_arp_update_event, rtevent_arp_update_fn); EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn); #endif #endif diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 98cccb3f9394..2e7f6f43f40b 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -1,2029 +1,2026 @@ /*- * Copyright (c) 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)rtsock.c 8.7 (Berkeley) 10/12/95 * $FreeBSD$ */ #include "opt_compat.h" -#include "opt_sctp.h" #include "opt_mpath.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif -#if defined(INET) || defined(INET6) -#ifdef SCTP -extern void sctp_addr_change(struct ifaddr *ifa, int cmd); -#endif /* SCTP */ -#endif - #ifdef COMPAT_FREEBSD32 #include #include struct if_data32 { uint8_t ifi_type; uint8_t ifi_physical; uint8_t ifi_addrlen; uint8_t ifi_hdrlen; uint8_t ifi_link_state; uint8_t ifi_vhid; uint8_t ifi_baudrate_pf; uint8_t ifi_datalen; uint32_t ifi_mtu; uint32_t ifi_metric; uint32_t ifi_baudrate; uint32_t ifi_ipackets; uint32_t ifi_ierrors; uint32_t ifi_opackets; uint32_t ifi_oerrors; uint32_t ifi_collisions; uint32_t ifi_ibytes; uint32_t ifi_obytes; uint32_t ifi_imcasts; uint32_t ifi_omcasts; uint32_t ifi_iqdrops; uint32_t ifi_noproto; uint32_t ifi_hwassist; int32_t ifi_epoch; struct timeval32 ifi_lastchange; }; struct if_msghdr32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; struct if_data32 ifm_data; }; struct if_msghdrl32 { uint16_t ifm_msglen; uint8_t ifm_version; uint8_t ifm_type; int32_t ifm_addrs; int32_t ifm_flags; uint16_t ifm_index; uint16_t _ifm_spare1; uint16_t ifm_len; uint16_t ifm_data_off; struct if_data32 ifm_data; }; struct ifa_msghdrl32 { uint16_t ifam_msglen; uint8_t ifam_version; uint8_t ifam_type; int32_t ifam_addrs; int32_t ifam_flags; uint16_t ifam_index; uint16_t _ifam_spare1; uint16_t ifam_len; uint16_t ifam_data_off; int32_t ifam_metric; struct if_data32 ifam_data; }; #endif /* COMPAT_FREEBSD32 */ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); /* NB: these are not modified */ static struct sockaddr route_src = { 2, PF_ROUTE, }; static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, }; /* These are external hooks for CARP. */ int (*carp_get_vhid_p)(struct ifaddr *); /* * Used by rtsock/raw_input callback code to decide whether to filter the update * notification to a socket bound to a particular FIB. */ #define RTS_FILTER_FIB M_PROTO8 static struct { int ip_count; /* attached w/ AF_INET */ int ip6_count; /* attached w/ AF_INET6 */ int ipx_count; /* attached w/ AF_IPX */ int any_count; /* total attached */ } route_cb; struct mtx rtsock_mtx; MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_LOCK() mtx_lock(&rtsock_mtx) #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); struct walkarg { int w_tmemsize; int w_op, w_arg; caddr_t w_tmem; struct sysctl_req *w_req; }; static void rts_input(struct mbuf *m); static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo); static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w); static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo); static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int sysctl_ifmalist(int af, struct walkarg *w); static int route_output(struct mbuf *m, struct socket *so); static void rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics_lite *out); static void rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, sa_family_t); static struct netisr_handler rtsock_nh = { .nh_name = "rtsock", .nh_handler = rts_input, .nh_proto = NETISR_ROUTE, .nh_policy = NETISR_POLICY_SOURCE, }; static int sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&rtsock_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&rtsock_nh, qlimit)); } SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_route_netisr_maxqlen, "I", "maximum routing socket dispatch queue length"); static void rts_init(void) { int tmp; if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) rtsock_nh.nh_qlimit = tmp; netisr_register(&rtsock_nh); } SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0); static int raw_input_rts_cb(struct mbuf *m, struct sockproto *proto, struct sockaddr *src, struct rawcb *rp) { int fibnum; KASSERT(m != NULL, ("%s: m is NULL", __func__)); KASSERT(proto != NULL, ("%s: proto is NULL", __func__)); KASSERT(rp != NULL, ("%s: rp is NULL", __func__)); /* No filtering requested. */ if ((m->m_flags & RTS_FILTER_FIB) == 0) return (0); /* Check if it is a rts and the fib matches the one of the socket. */ fibnum = M_GETFIB(m); if (proto->sp_family != PF_ROUTE || rp->rcb_socket == NULL || rp->rcb_socket->so_fibnum == fibnum) return (0); /* Filtering requested and no match, the socket shall be skipped. */ return (1); } static void rts_input(struct mbuf *m) { struct sockproto route_proto; unsigned short *family; struct m_tag *tag; route_proto.sp_family = PF_ROUTE; tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL); if (tag != NULL) { family = (unsigned short *)(tag + 1); route_proto.sp_protocol = *family; m_tag_delete(m, tag); } else route_proto.sp_protocol = 0; raw_input_ext(m, &route_proto, &route_src, raw_input_rts_cb); } /* * It really doesn't make any sense at all for this code to share much * with raw_usrreq.c, since its functionality is so restricted. XXX */ static void rts_abort(struct socket *so) { raw_usrreqs.pru_abort(so); } static void rts_close(struct socket *so) { raw_usrreqs.pru_close(so); } /* pru_accept is EOPNOTSUPP */ static int rts_attach(struct socket *so, int proto, struct thread *td) { struct rawcb *rp; int error; KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL")); /* XXX */ rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO); if (rp == NULL) return ENOBUFS; so->so_pcb = (caddr_t)rp; so->so_fibnum = td->td_proc->p_fibnum; error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { so->so_pcb = NULL; free(rp, M_PCB); return error; } RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count++; break; case AF_INET6: route_cb.ip6_count++; break; case AF_IPX: route_cb.ipx_count++; break; } route_cb.any_count++; RTSOCK_UNLOCK(); soisconnected(so); so->so_options |= SO_USELOOPBACK; return 0; } static int rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */ } static int rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */ } /* pru_connect2 is EOPNOTSUPP */ /* pru_control is EOPNOTSUPP */ static void rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); KASSERT(rp != NULL, ("rts_detach: rp == NULL")); RTSOCK_LOCK(); switch(rp->rcb_proto.sp_protocol) { case AF_INET: route_cb.ip_count--; break; case AF_INET6: route_cb.ip6_count--; break; case AF_IPX: route_cb.ipx_count--; break; } route_cb.any_count--; RTSOCK_UNLOCK(); raw_usrreqs.pru_detach(so); } static int rts_disconnect(struct socket *so) { return (raw_usrreqs.pru_disconnect(so)); } /* pru_listen is EOPNOTSUPP */ static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_peeraddr(so, nam)); } /* pru_rcvd is EOPNOTSUPP */ /* pru_rcvoob is EOPNOTSUPP */ static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { return (raw_usrreqs.pru_send(so, flags, m, nam, control, td)); } /* pru_sense is null */ static int rts_shutdown(struct socket *so) { return (raw_usrreqs.pru_shutdown(so)); } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { return (raw_usrreqs.pru_sockaddr(so, nam)); } static struct pr_usrreqs route_usrreqs = { .pru_abort = rts_abort, .pru_attach = rts_attach, .pru_bind = rts_bind, .pru_connect = rts_connect, .pru_detach = rts_detach, .pru_disconnect = rts_disconnect, .pru_peeraddr = rts_peeraddr, .pru_send = rts_send, .pru_shutdown = rts_shutdown, .pru_sockaddr = rts_sockaddr, .pru_close = rts_close, }; #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED /* * The union of all possible address formats we handle. */ union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ static int rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred) { /* First, see if the returned address is part of the jail. */ if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) { info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; return (0); } switch (info->rti_info[RTAX_DST]->sa_family) { #ifdef INET case AF_INET: { struct in_addr ia; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET) continue; ia = ((struct sockaddr_in *)sa)->sin_addr; if (prison_check_ip4(cred, &ia) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)-> sin_addr; if (prison_get_ip4(cred, &ia) != 0) return (ESRCH); } bzero(&saun->sin, sizeof(struct sockaddr_in)); saun->sin.sin_len = sizeof(struct sockaddr_in); saun->sin.sin_family = AF_INET; saun->sin.sin_addr.s_addr = ia.s_addr; info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin; break; } #endif #ifdef INET6 case AF_INET6: { struct in6_addr ia6; struct ifaddr *ifa; int found; found = 0; /* * Try to find an address on the given outgoing interface * that belongs to the jail. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa; sa = ifa->ifa_addr; if (sa->sa_family != AF_INET6) continue; bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr, &ia6, sizeof(struct in6_addr)); if (prison_check_ip6(cred, &ia6) == 0) { found = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!found) { /* * As a last resort return the 'default' jail address. */ ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)-> sin6_addr; if (prison_get_ip6(cred, &ia6) != 0) return (ESRCH); } bzero(&saun->sin6, sizeof(struct sockaddr_in6)); saun->sin6.sin6_len = sizeof(struct sockaddr_in6); saun->sin6.sin6_family = AF_INET6; bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr)); if (sa6_recoverscope(&saun->sin6) != 0) return (ESRCH); info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6; break; } #endif default: return (ESRCH); } return (0); } /*ARGSUSED*/ static int route_output(struct mbuf *m, struct socket *so) { #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct radix_node_head *rnh; struct rt_addrinfo info; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; int i, rti_need_deembed = 0; #endif int len, error = 0; struct ifnet *ifp = NULL; union sockaddr_union saun; sa_family_t saf = AF_UNSPEC; #define senderr(e) { error = e; goto flush;} if (m == NULL || ((m->m_len < sizeof(long)) && (m = m_pullup(m, sizeof(long))) == NULL)) return (ENOBUFS); if ((m->m_flags & M_PKTHDR) == 0) panic("route_output"); len = m->m_pkthdr.len; if (len < sizeof(*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } R_Malloc(rtm, struct rt_msghdr *, len); if (rtm == NULL) { info.rti_info[RTAX_DST] = NULL; senderr(ENOBUFS); } m_copydata(m, 0, len, (caddr_t)rtm); if (rtm->rtm_version != RTM_VERSION) { info.rti_info[RTAX_DST] = NULL; senderr(EPROTONOSUPPORT); } rtm->rtm_pid = curproc->p_pid; bzero(&info, sizeof(info)); info.rti_addrs = rtm->rtm_addrs; /* * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6 * link-local address because rtrequest requires addresses with * embedded scope id. */ if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } info.rti_flags = rtm->rtm_flags; if (info.rti_info[RTAX_DST] == NULL || info.rti_info[RTAX_DST]->sa_family >= AF_MAX || (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); saf = info.rti_info[RTAX_DST]->sa_family; /* * Verify that the caller has the appropriate privilege; RTM_GET * is the only operation the non-superuser is allowed. */ if (rtm->rtm_type != RTM_GET) { error = priv_check(curthread, PRIV_NET_ROUTE); if (error) senderr(error); } /* * The given gateway address may be an interface address. * For example, issuing a "route change" command on a route * entry that was created from a tunnel, and the gateway * address given is the local end point. In this case the * RTF_GATEWAY flag must be cleared or the destination will * not be reachable even though there is no error message. */ if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) { struct route gw_ro; bzero(&gw_ro, sizeof(gw_ro)); gw_ro.ro_dst = *info.rti_info[RTAX_GATEWAY]; rtalloc_ign_fib(&gw_ro, 0, so->so_fibnum); /* * A host route through the loopback interface is * installed for each interface adddress. In pre 8.0 * releases the interface address of a PPP link type * is not reachable locally. This behavior is fixed as * part of the new L2/L3 redesign and rewrite work. The * signature of this interface address route is the * AF_LINK sa_family type of the rt_gateway, and the * rt_ifp has the IFF_LOOPBACK flag set. */ if (gw_ro.ro_rt != NULL && gw_ro.ro_rt->rt_gateway->sa_family == AF_LINK && gw_ro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) { info.rti_flags &= ~RTF_GATEWAY; info.rti_flags |= RTF_GWFLAG_COMPAT; } if (gw_ro.ro_rt != NULL) RTFREE(gw_ro.ro_rt); } switch (rtm->rtm_type) { struct rtentry *saved_nrt; case RTM_ADD: if (info.rti_info[RTAX_GATEWAY] == NULL) senderr(EINVAL); saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt, so->so_fibnum); if (error == 0 && saved_nrt) { #ifdef INET6 rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif RT_LOCK(saved_nrt); rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &saved_nrt->rt_rmx); rtm->rtm_index = saved_nrt->rt_ifp->if_index; RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; case RTM_DELETE: saved_nrt = NULL; /* support for new ARP code */ if (info.rti_info[RTAX_GATEWAY] && (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) && (rtm->rtm_flags & RTF_LLDATA) != 0) { error = lla_rt_output(rtm, &info); #ifdef INET6 if (error == 0) rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; } error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt, so->so_fibnum); if (error == 0) { RT_LOCK(saved_nrt); rt = saved_nrt; goto report; } #ifdef INET6 /* rt_msg2() will not be used when RTM_DELETE fails. */ rti_need_deembed = (V_deembed_scopeid) ? 1 : 0; #endif break; case RTM_GET: case RTM_CHANGE: case RTM_LOCK: rnh = rt_tables_get_rnh(so->so_fibnum, info.rti_info[RTAX_DST]->sa_family); if (rnh == NULL) senderr(EAFNOSUPPORT); RADIX_NODE_HEAD_RLOCK(rnh); if (info.rti_info[RTAX_NETMASK] == NULL && rtm->rtm_type == RTM_GET) { /* * Provide logest prefix match for * address lookup (no mask). * 'route -n get addr' */ rt = (struct rtentry *) rnh->rnh_matchaddr( info.rti_info[RTAX_DST], rnh); } else rt = (struct rtentry *) rnh->rnh_lookup( info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh); if (rt == NULL) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } #ifdef RADIX_MPATH /* * for RTM_CHANGE/LOCK, if we got multipath routes, * we require users to specify a matching RTAX_GATEWAY. * * for RTM_GET, gate is optional even with multipath. * if gate == NULL the first match is returned. * (no need to call rt_mpath_matchgate if gate == NULL) */ if (rn_mpath_capable(rnh) && (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) { rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]); if (!rt) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } } #endif /* * If performing proxied L2 entry insertion, and * the actual PPP host entry is found, perform * another search to retrieve the prefix route of * the local end point of the PPP link. */ if (rtm->rtm_flags & RTF_ANNOUNCE) { struct sockaddr laddr; if (rt->rt_ifp != NULL && rt->rt_ifp->if_type == IFT_PROPVIRTUAL) { struct ifaddr *ifa; ifa = ifa_ifwithnet(info.rti_info[RTAX_DST], 1); if (ifa != NULL) rt_maskedcopy(ifa->ifa_addr, &laddr, ifa->ifa_netmask); } else rt_maskedcopy(rt->rt_ifa->ifa_addr, &laddr, rt->rt_ifa->ifa_netmask); /* * refactor rt and no lock operation necessary */ rt = (struct rtentry *)rnh->rnh_matchaddr(&laddr, rnh); if (rt == NULL) { RADIX_NODE_HEAD_RUNLOCK(rnh); senderr(ESRCH); } } RT_LOCK(rt); RT_ADDREF(rt); RADIX_NODE_HEAD_RUNLOCK(rnh); switch(rtm->rtm_type) { case RTM_GET: report: RT_LOCK_ASSERT(rt); if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(curthread->td_ucred) : prison_if(curthread->td_ucred, rt_key(rt)) != 0) { RT_UNLOCK(rt); senderr(ESRCH); } info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = 0; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { ifp = rt->rt_ifp; if (ifp) { info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; error = rtm_get_jailed(&info, ifp, rt, &saun, curthread->td_ucred); if (error != 0) { RT_UNLOCK(rt); senderr(error); } if (ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; rtm->rtm_index = ifp->if_index; } else { info.rti_info[RTAX_IFP] = NULL; info.rti_info[RTAX_IFA] = NULL; } } else if ((ifp = rt->rt_ifp) != NULL) { rtm->rtm_index = ifp->if_index; } len = rt_msg2(rtm->rtm_type, &info, NULL, NULL); if (len > rtm->rtm_msglen) { struct rt_msghdr *new_rtm; R_Malloc(new_rtm, struct rt_msghdr *, len); if (new_rtm == NULL) { RT_UNLOCK(rt); senderr(ENOBUFS); } bcopy(rtm, new_rtm, rtm->rtm_msglen); Free(rtm); rtm = new_rtm; } (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL); if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); rtm->rtm_addrs = info.rti_addrs; break; case RTM_CHANGE: /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ if (((rt->rt_flags & RTF_GATEWAY) && info.rti_info[RTAX_GATEWAY] != NULL) || info.rti_info[RTAX_IFP] != NULL || (info.rti_info[RTAX_IFA] != NULL && !sa_equal(info.rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) { RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); error = rt_getifa_fib(&info, rt->rt_fibnum); /* * XXXRW: Really we should release this * reference later, but this maintains * historical behavior. */ if (info.rti_ifa != NULL) ifa_free(info.rti_ifa); RADIX_NODE_HEAD_UNLOCK(rnh); if (error != 0) senderr(error); RT_LOCK(rt); } if (info.rti_ifa != NULL && info.rti_ifa != rt->rt_ifa && rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) { rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, &info); ifa_free(rt->rt_ifa); } if (info.rti_info[RTAX_GATEWAY] != NULL) { RT_UNLOCK(rt); RADIX_NODE_HEAD_LOCK(rnh); RT_LOCK(rt); error = rt_setgate(rt, rt_key(rt), info.rti_info[RTAX_GATEWAY]); RADIX_NODE_HEAD_UNLOCK(rnh); if (error != 0) { RT_UNLOCK(rt); senderr(error); } rt->rt_flags &= ~RTF_GATEWAY; rt->rt_flags |= (RTF_GATEWAY & info.rti_flags); } if (info.rti_ifa != NULL && info.rti_ifa != rt->rt_ifa) { ifa_ref(info.rti_ifa); rt->rt_ifa = info.rti_ifa; rt->rt_ifp = info.rti_ifp; } /* Allow some flags to be toggled on change. */ rt->rt_flags = (rt->rt_flags & ~RTF_FMASK) | (rtm->rtm_flags & RTF_FMASK); rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &rt->rt_rmx); rtm->rtm_index = rt->rt_ifp->if_index; if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info); /* FALLTHROUGH */ case RTM_LOCK: /* We don't support locks anymore */ break; } RT_UNLOCK(rt); break; default: senderr(EOPNOTSUPP); } flush: if (rtm) { if (error) rtm->rtm_errno = error; else rtm->rtm_flags |= RTF_DONE; } if (rt) /* XXX can this be true? */ RTFREE(rt); { struct rawcb *rp = NULL; /* * Check to see if we don't want our own messages. */ if ((so->so_options & SO_USELOOPBACK) == 0) { if (route_cb.any_count <= 1) { if (rtm) Free(rtm); m_freem(m); return (error); } /* There is another listener, so construct message */ rp = sotorawcb(so); } if (rtm) { #ifdef INET6 if (rti_need_deembed) { /* sin6_scope_id is recovered before sending rtm. */ sin6 = (struct sockaddr_in6 *)&ss; for (i = 0; i < RTAX_MAX; i++) { if (info.rti_info[i] == NULL) continue; if (info.rti_info[i]->sa_family != AF_INET6) continue; bcopy(info.rti_info[i], sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) bcopy(sin6, info.rti_info[i], sizeof(*sin6)); } } #endif m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; } else if (m->m_pkthdr.len > rtm->rtm_msglen) m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); } if (m) { M_SETFIB(m, so->so_fibnum); m->m_flags |= RTS_FILTER_FIB; if (rp) { /* * XXX insure we don't get a copy by * invalidating our protocol */ unsigned short family = rp->rcb_proto.sp_family; rp->rcb_proto.sp_family = 0; rt_dispatch(m, saf); rp->rcb_proto.sp_family = family; } else rt_dispatch(m, saf); } /* info.rti_info[RTAX_DST] (used above) can point inside of rtm */ if (rtm) Free(rtm); } return (error); #undef sa_equal } static void rt_setmetrics(u_long which, const struct rt_metrics *in, struct rt_metrics_lite *out) { #define metric(f, e) if (which & (f)) out->e = in->e; /* * Only these are stored in the routing entry since introduction * of tcp hostcache. The rest is ignored. */ metric(RTV_MTU, rmx_mtu); metric(RTV_WEIGHT, rmx_weight); /* Userland -> kernel timebase conversion. */ if (which & RTV_EXPIRE) out->rmx_expire = in->rmx_expire ? in->rmx_expire - time_second + time_uptime : 0; #undef metric } static void rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out) { #define metric(e) out->e = in->e; bzero(out, sizeof(*out)); metric(rmx_mtu); metric(rmx_weight); /* Kernel -> userland timebase conversion. */ out->rmx_expire = in->rmx_expire ? in->rmx_expire - time_uptime + time_second : 0; #undef metric } /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. * This data is derived straight from userland. */ static int rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) { struct sockaddr *sa; int i; for (i = 0; i < RTAX_MAX && cp < cplim; i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; sa = (struct sockaddr *)cp; /* * It won't fit. */ if (cp + sa->sa_len > cplim) return (EINVAL); /* * there are no more.. quit now * If there are more bits, they are in error. * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ if (sa->sa_len == 0) { rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } /* accept it */ #ifdef INET6 if (sa->sa_family == AF_INET6) sa6_embedscope((struct sockaddr_in6 *)sa, V_ip6_use_defzone); #endif rtinfo->rti_info[i] = sa; cp += SA_SIZE(sa); } return (0); } /* * Used by the routing socket. */ static struct mbuf * rt_msg1(int type, struct rt_addrinfo *rtinfo) { struct rt_msghdr *rtm; struct mbuf *m; int i; struct sockaddr *sa; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif int len, dlen; switch (type) { case RTM_DELADDR: case RTM_NEWADDR: len = sizeof(struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; case RTM_IFINFO: len = sizeof(struct if_msghdr); break; case RTM_IFANNOUNCE: case RTM_IEEE80211: len = sizeof(struct if_announcemsghdr); break; default: len = sizeof(struct rt_msghdr); } /* XXXGL: can we use MJUMPAGESIZE cluster here? */ KASSERT(len <= MCLBYTES, ("%s: message too big", __func__)); if (len > MHLEN) m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); else m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (m); m->m_pkthdr.len = m->m_len = len; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif m_copyback(m, len, dlen, (caddr_t)sa); len += dlen; } if (m->m_pkthdr.len != len) { m_freem(m); return (NULL); } rtm->rtm_msglen = len; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; return (m); } /* * Used by the sysctl code and routing socket. */ static int rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) { int i; int len, dlen, second_time = 0; caddr_t cp0; #ifdef INET6 struct sockaddr_storage ss; struct sockaddr_in6 *sin6; #endif rtinfo->rti_addrs = 0; again: switch (type) { case RTM_DELADDR: case RTM_NEWADDR: if (w != NULL && w->w_op == NET_RT_IFLISTL) { #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) len = sizeof(struct ifa_msghdrl32); else #endif len = sizeof(struct ifa_msghdrl); } else len = sizeof(struct ifa_msghdr); break; case RTM_IFINFO: #ifdef COMPAT_FREEBSD32 if (w != NULL && w->w_req->flags & SCTL_MASK32) { if (w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl32); else len = sizeof(struct if_msghdr32); break; } #endif if (w != NULL && w->w_op == NET_RT_IFLISTL) len = sizeof(struct if_msghdrl); else len = sizeof(struct if_msghdr); break; case RTM_NEWMADDR: len = sizeof(struct ifma_msghdr); break; default: len = sizeof(struct rt_msghdr); } cp0 = cp; if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa; if ((sa = rtinfo->rti_info[i]) == NULL) continue; rtinfo->rti_addrs |= (1 << i); dlen = SA_SIZE(sa); if (cp) { #ifdef INET6 if (V_deembed_scopeid && sa->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)&ss; bcopy(sa, sin6, sizeof(*sin6)); if (sa6_recoverscope(sin6) == 0) sa = (struct sockaddr *)sin6; } #endif bcopy((caddr_t)sa, cp, (unsigned)dlen); cp += dlen; } len += dlen; } len = ALIGN(len); if (cp == NULL && w != NULL && !second_time) { struct walkarg *rw = w; if (rw->w_req) { if (rw->w_tmemsize < len) { if (rw->w_tmem) free(rw->w_tmem, M_RTABLE); rw->w_tmem = (caddr_t) malloc(len, M_RTABLE, M_NOWAIT); if (rw->w_tmem) rw->w_tmemsize = len; } if (rw->w_tmem) { cp = rw->w_tmem; second_time = 1; goto again; } } } if (cp) { struct rt_msghdr *rtm = (struct rt_msghdr *)cp0; rtm->rtm_version = RTM_VERSION; rtm->rtm_type = type; rtm->rtm_msglen = len; } return (len); } /* * This routine is called to generate a message from the routing * socket indicating that a redirect has occured, a routing lookup * has failed, or that a protocol has detected timeouts to a particular * destination. */ void rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error, int fibnum) { struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); if (m == NULL) return; if (fibnum != RT_ALL_FIBS) { KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out " "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs)); M_SETFIB(m, fibnum); m->m_flags |= RTS_FILTER_FIB; } rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; rtm->rtm_errno = error; rtm->rtm_addrs = rtinfo->rti_addrs; rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); } void rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) { rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS); } /* * This routine is called to generate a message from the routing * socket indicating that the status of a network interface has changed. */ void rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); m = rt_msg1(RTM_IFINFO, &info); if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_data = ifp->if_data; ifm->ifm_addrs = 0; rt_dispatch(m, AF_UNSPEC); } /* - * This is called to generate messages from the routing socket - * indicating a network interface has had addresses associated with it. - * if we ever reverse the logic and replace messages TO the routing - * socket indicate a request to configure interfaces, then it will - * be unnecessary as the routing socket will automatically generate - * copies of it. + * Announce interface address arrival/withdraw. + * Please do not call directly, use rt_addrmsg(). + * Assume input data to be valid. + * Returns 0 on success. */ -void -rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, - int fibnum) +int +rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum) { struct rt_addrinfo info; - struct sockaddr *sa = NULL; - int pass; - struct mbuf *m = NULL; + struct sockaddr *sa; + int ncmd; + struct mbuf *m; + struct ifa_msghdr *ifam; struct ifnet *ifp = ifa->ifa_ifp; - KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE, - ("unexpected cmd %u", cmd)); -#if defined(INET) || defined(INET6) -#ifdef SCTP - /* - * notify the SCTP stack - * this will only get called when an address is added/deleted - * XXX pass the ifaddr struct instead if ifa->ifa_addr... - */ - sctp_addr_change(ifa, cmd); -#endif /* SCTP */ -#endif if (route_cb.any_count == 0) - return; - for (pass = 1; pass < 3; pass++) { - bzero((caddr_t)&info, sizeof(info)); - if ((cmd == RTM_ADD && pass == 1) || - (cmd == RTM_DELETE && pass == 2)) { - struct ifa_msghdr *ifam; - int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; - - info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; - info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; - info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; - info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; - if ((m = rt_msg1(ncmd, &info)) == NULL) - continue; - ifam = mtod(m, struct ifa_msghdr *); - ifam->ifam_index = ifp->if_index; - ifam->ifam_metric = ifa->ifa_metric; - ifam->ifam_flags = ifa->ifa_flags; - ifam->ifam_addrs = info.rti_addrs; - } - if ((cmd == RTM_ADD && pass == 2) || - (cmd == RTM_DELETE && pass == 1)) { - struct rt_msghdr *rtm; + return (0); - if (rt == NULL) - continue; - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_DST] = sa = rt_key(rt); - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - if ((m = rt_msg1(cmd, &info)) == NULL) - continue; - rtm = mtod(m, struct rt_msghdr *); - rtm->rtm_index = ifp->if_index; - rtm->rtm_flags |= rt->rt_flags; - rtm->rtm_errno = error; - rtm->rtm_addrs = info.rti_addrs; - } - if (fibnum != RT_ALL_FIBS) { - KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: " - "fibnum out of range 0 <= %d < %d", __func__, - fibnum, rt_numfibs)); - M_SETFIB(m, fibnum); - m->m_flags |= RTS_FILTER_FIB; - } - rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); + ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR; + + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr; + info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr; + info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; + info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; + if ((m = rt_msg1(ncmd, &info)) == NULL) + return (ENOBUFS); + ifam = mtod(m, struct ifa_msghdr *); + ifam->ifam_index = ifp->if_index; + ifam->ifam_metric = ifa->ifa_metric; + ifam->ifam_flags = ifa->ifa_flags; + ifam->ifam_addrs = info.rti_addrs; + + if (fibnum != RT_ALL_FIBS) { + M_SETFIB(m, fibnum); + m->m_flags |= RTS_FILTER_FIB; } + + rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); + + return (0); } -void -rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) +/* + * Announce route addition/removal. + * Please do not call directly, use rt_routemsg(). + * Note that @rt data MAY be inconsistent/invalid: + * if some userland app sends us "invalid" route message (invalid mask, + * no dst, wrong address families, etc...) we need to pass it back + * to app (and any other rtsock consumers) with rtm_errno field set to + * non-zero value. + * + * Returns 0 on success. + */ +int +rtsock_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt, + int fibnum) { + struct rt_addrinfo info; + struct sockaddr *sa; + struct mbuf *m; + struct rt_msghdr *rtm; + + if (route_cb.any_count == 0) + return (0); - rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS); + bzero((caddr_t)&info, sizeof(info)); + info.rti_info[RTAX_NETMASK] = rt_mask(rt); + info.rti_info[RTAX_DST] = sa = rt_key(rt); + info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + if ((m = rt_msg1(cmd, &info)) == NULL) + return (ENOBUFS); + rtm = mtod(m, struct rt_msghdr *); + rtm->rtm_index = ifp->if_index; + rtm->rtm_flags |= rt->rt_flags; + rtm->rtm_errno = error; + rtm->rtm_addrs = info.rti_addrs; + + if (fibnum != RT_ALL_FIBS) { + M_SETFIB(m, fibnum); + m->m_flags |= RTS_FILTER_FIB; + } + + rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC); + + return (0); } + /* * This is the analogue to the rt_newaddrmsg which performs the same * function but for multicast group memberhips. This is easier since * there is no route state to worry about. */ void rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) { struct rt_addrinfo info; struct mbuf *m = NULL; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; if (route_cb.any_count == 0) return; bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr; m = rt_msg1(cmd, &info); if (m == NULL) return; ifmam = mtod(m, struct ifma_msghdr *); KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n", __func__)); ifmam->ifmam_index = ifp->if_index; ifmam->ifmam_addrs = info.rti_addrs; rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC); } static struct mbuf * rt_makeifannouncemsg(struct ifnet *ifp, int type, int what, struct rt_addrinfo *info) { struct if_announcemsghdr *ifan; struct mbuf *m; if (route_cb.any_count == 0) return NULL; bzero((caddr_t)info, sizeof(*info)); m = rt_msg1(type, info); if (m != NULL) { ifan = mtod(m, struct if_announcemsghdr *); ifan->ifan_index = ifp->if_index; strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name)); ifan->ifan_what = what; } return m; } /* * This is called to generate routing socket messages indicating * IEEE80211 wireless events. * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way. */ void rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info); if (m != NULL) { /* * Append the ieee80211 data. Try to stick it in the * mbuf containing the ifannounce msg; otherwise allocate * a new mbuf and append. * * NB: we assume m is a single mbuf. */ if (data_len > M_TRAILINGSPACE(m)) { struct mbuf *n = m_get(M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return; } bcopy(data, mtod(n, void *), data_len); n->m_len = data_len; m->m_next = n; } else if (data_len > 0) { bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len); m->m_len += data_len; } if (m->m_flags & M_PKTHDR) m->m_pkthdr.len += data_len; mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len; rt_dispatch(m, AF_UNSPEC); } } /* * This is called to generate routing socket messages indicating * network interface arrival and departure. */ void rt_ifannouncemsg(struct ifnet *ifp, int what) { struct mbuf *m; struct rt_addrinfo info; m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info); if (m != NULL) rt_dispatch(m, AF_UNSPEC); } static void rt_dispatch(struct mbuf *m, sa_family_t saf) { struct m_tag *tag; /* * Preserve the family from the sockaddr, if any, in an m_tag for * use when injecting the mbuf into the routing socket buffer from * the netisr. */ if (saf != AF_UNSPEC) { tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short), M_NOWAIT); if (tag == NULL) { m_freem(m); return; } *(unsigned short *)(tag + 1) = saf; m_tag_prepend(m, tag); } #ifdef VIMAGE if (V_loif) m->m_pkthdr.rcvif = V_loif; else { m_freem(m); return; } #endif netisr_queue(NETISR_ROUTE, m); /* mbuf is free'd on failure. */ } /* * This is used in dumping the kernel table via sysctl(). */ static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) return 0; if ((rt->rt_flags & RTF_HOST) == 0 ? jailed_without_vnet(w->w_req->td->td_ucred) : prison_if(w->w_req->td->td_ucred, rt_key(rt)) != 0) return (0); bzero((caddr_t)&info, sizeof(info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = 0; if (rt->rt_ifp) { info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr; info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; if (rt->rt_ifp->if_flags & IFF_POINTOPOINT) info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; } size = rt_msg2(RTM_GET, &info, NULL, w); if (w->w_req && w->w_tmem) { struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; if (rt->rt_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | (rt->rt_flags & ~RTF_GWFLAG_COMPAT); else rtm->rtm_flags = rt->rt_flags; /* * let's be honest about this being a retarded hack */ rtm->rtm_fmask = rt->rt_rmx.rmx_pksent; rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); return (error); } return (error); } #ifdef COMPAT_FREEBSD32 static void copy_ifdata32(struct if_data *src, struct if_data32 *dst) { bzero(dst, sizeof(*dst)); CP(*src, *dst, ifi_type); CP(*src, *dst, ifi_physical); CP(*src, *dst, ifi_addrlen); CP(*src, *dst, ifi_hdrlen); CP(*src, *dst, ifi_link_state); CP(*src, *dst, ifi_vhid); CP(*src, *dst, ifi_baudrate_pf); dst->ifi_datalen = sizeof(struct if_data32); CP(*src, *dst, ifi_mtu); CP(*src, *dst, ifi_metric); CP(*src, *dst, ifi_baudrate); CP(*src, *dst, ifi_ipackets); CP(*src, *dst, ifi_ierrors); CP(*src, *dst, ifi_opackets); CP(*src, *dst, ifi_oerrors); CP(*src, *dst, ifi_collisions); CP(*src, *dst, ifi_ibytes); CP(*src, *dst, ifi_obytes); CP(*src, *dst, ifi_imcasts); CP(*src, *dst, ifi_omcasts); CP(*src, *dst, ifi_iqdrops); CP(*src, *dst, ifi_noproto); CP(*src, *dst, ifi_hwassist); CP(*src, *dst, ifi_epoch); TV_CP(*src, *dst, ifi_lastchange); } #endif static int sysctl_iflist_ifml(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdrl *ifm; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdrl32 *ifm32; ifm32 = (struct if_msghdrl32 *)w->w_tmem; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; ifm32->_ifm_spare1 = 0; ifm32->ifm_len = sizeof(*ifm32); ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data); copy_ifdata32(&ifp->if_data, &ifm32->ifm_data); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifm32->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len)); } #endif ifm = (struct if_msghdrl *)w->w_tmem; ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifm->_ifm_spare1 = 0; ifm->ifm_len = sizeof(*ifm); ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data); ifm->ifm_data = ifp->if_data; /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifm(struct ifnet *ifp, struct rt_addrinfo *info, struct walkarg *w, int len) { struct if_msghdr *ifm; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct if_msghdr32 *ifm32; ifm32 = (struct if_msghdr32 *)w->w_tmem; ifm32->ifm_addrs = info->rti_addrs; ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm32->ifm_index = ifp->if_index; copy_ifdata32(&ifp->if_data, &ifm32->ifm_data); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifm32->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm32, len)); } #endif ifm = (struct if_msghdr *)w->w_tmem; ifm->ifm_addrs = info->rti_addrs; ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags; ifm->ifm_index = ifp->if_index; ifm->ifm_data = ifp->if_data; /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifm->ifm_data.ifi_vhid = (*carp_get_vhid_p)(ifp->if_addr); return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len)); } static int sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdrl *ifam; #ifdef COMPAT_FREEBSD32 if (w->w_req->flags & SCTL_MASK32) { struct ifa_msghdrl32 *ifam32; ifam32 = (struct ifa_msghdrl32 *)w->w_tmem; ifam32->ifam_addrs = info->rti_addrs; ifam32->ifam_flags = ifa->ifa_flags; ifam32->ifam_index = ifa->ifa_ifp->if_index; ifam32->_ifam_spare1 = 0; ifam32->ifam_len = sizeof(*ifam32); ifam32->ifam_data_off = offsetof(struct ifa_msghdrl32, ifam_data); ifam32->ifam_metric = ifa->ifa_metric; bzero(&ifam32->ifam_data, sizeof(ifam32->ifam_data)); ifam32->ifam_data.ifi_datalen = sizeof(struct if_data32); ifam32->ifam_data.ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); ifam32->ifam_data.ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); ifam32->ifam_data.ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); ifam32->ifam_data.ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifam32->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa); return (SYSCTL_OUT(w->w_req, (caddr_t)ifam32, len)); } #endif ifam = (struct ifa_msghdrl *)w->w_tmem; ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->_ifam_spare1 = 0; ifam->ifam_len = sizeof(*ifam); ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data); ifam->ifam_metric = ifa->ifa_metric; bzero(&ifam->ifam_data, sizeof(ifam->ifam_data)); ifam->ifam_data.ifi_datalen = sizeof(struct if_data); ifam->ifam_data.ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets); ifam->ifam_data.ifi_opackets = counter_u64_fetch(ifa->ifa_opackets); ifam->ifam_data.ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes); ifam->ifam_data.ifi_obytes = counter_u64_fetch(ifa->ifa_obytes); /* Fixup if_data carp(4) vhid. */ if (carp_get_vhid_p != NULL) ifam->ifam_data.ifi_vhid = (*carp_get_vhid_p)(ifa); return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info, struct walkarg *w, int len) { struct ifa_msghdr *ifam; ifam = (struct ifa_msghdr *)w->w_tmem; ifam->ifam_addrs = info->rti_addrs; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_index = ifa->ifa_ifp->if_index; ifam->ifam_metric = ifa->ifa_metric; return (SYSCTL_OUT(w->w_req, w->w_tmem, len)); } static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifaddr *ifa; struct rt_addrinfo info; int len, error = 0; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; len = rt_msg2(RTM_IFINFO, &info, NULL, w); info.rti_info[RTAX_IFP] = NULL; if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifml(ifp, &info, w, len); else error = sysctl_iflist_ifm(ifp, &info, w, len); if (error) goto done; } while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) { if (af && af != ifa->ifa_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifa->ifa_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; len = rt_msg2(RTM_NEWADDR, &info, NULL, w); if (w->w_req && w->w_tmem) { if (w->w_op == NET_RT_IFLISTL) error = sysctl_iflist_ifaml(ifa, &info, w, len); else error = sysctl_iflist_ifam(ifa, &info, w, len); if (error) goto done; } } IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = info.rti_info[RTAX_BRD] = NULL; } done: if (ifp != NULL) IF_ADDR_RUNLOCK(ifp); IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_ifmalist(int af, struct walkarg *w) { struct ifnet *ifp; struct ifmultiaddr *ifma; struct rt_addrinfo info; int len, error = 0; struct ifaddr *ifa; bzero((caddr_t)&info, sizeof(info)); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (af && af != ifma->ifma_addr->sa_family) continue; if (prison_if(w->w_req->td->td_ucred, ifma->ifma_addr) != 0) continue; info.rti_info[RTAX_IFA] = ifma->ifma_addr; info.rti_info[RTAX_GATEWAY] = (ifma->ifma_addr->sa_family != AF_LINK) ? ifma->ifma_lladdr : NULL; len = rt_msg2(RTM_NEWMADDR, &info, NULL, w); if (w->w_req && w->w_tmem) { struct ifma_msghdr *ifmam; ifmam = (struct ifma_msghdr *)w->w_tmem; ifmam->ifmam_index = ifma->ifma_ifp->if_index; ifmam->ifmam_flags = 0; ifmam->ifmam_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, w->w_tmem, len); if (error) { IF_ADDR_RUNLOCK(ifp); goto done; } } } IF_ADDR_RUNLOCK(ifp); } done: IFNET_RUNLOCK_NOSLEEP(); return (error); } static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1; u_int namelen = arg2; struct radix_node_head *rnh = NULL; /* silence compiler. */ int i, lim, error = EINVAL; int fib = 0; u_char af; struct walkarg w; name ++; namelen--; if (req->newptr) return (EPERM); if (name[1] == NET_RT_DUMP) { if (namelen == 3) fib = req->td->td_proc->p_fibnum; else if (namelen == 4) fib = (name[3] == RT_ALL_FIBS) ? req->td->td_proc->p_fibnum : name[3]; else return ((namelen < 3) ? EISDIR : ENOTDIR); if (fib < 0 || fib >= rt_numfibs) return (EINVAL); } else if (namelen != 3) return ((namelen < 3) ? EISDIR : ENOTDIR); af = name[0]; if (af > AF_MAX) return (EINVAL); bzero(&w, sizeof(w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; error = sysctl_wire_old_buffer(req, 0); if (error) return (error); switch (w.w_op) { case NET_RT_DUMP: case NET_RT_FLAGS: if (af == 0) { /* dump all tables */ i = 1; lim = AF_MAX; } else /* dump only one table */ i = lim = af; /* * take care of llinfo entries, the caller must * specify an AF */ if (w.w_op == NET_RT_FLAGS && (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) { if (af != 0) error = lltable_sysctl_dumparp(af, w.w_req); else error = EINVAL; break; } /* * take care of routing entries */ for (error = 0; error == 0 && i <= lim; i++) { rnh = rt_tables_get_rnh(fib, i); if (rnh != NULL) { RADIX_NODE_HEAD_RLOCK(rnh); error = rnh->rnh_walktree(rnh, sysctl_dumpentry, &w); RADIX_NODE_HEAD_RUNLOCK(rnh); } else if (af != 0) error = EAFNOSUPPORT; } break; case NET_RT_IFLIST: case NET_RT_IFLISTL: error = sysctl_iflist(af, &w); break; case NET_RT_IFMALIST: error = sysctl_ifmalist(af, &w); break; } if (w.w_tmem) free(w.w_tmem, M_RTABLE); return (error); } static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, ""); /* * Definitions of protocols supported in the ROUTE domain. */ static struct domain routedomain; /* or at least forward */ static struct protosw routesw[] = { { .pr_type = SOCK_RAW, .pr_domain = &routedomain, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_output = route_output, .pr_ctlinput = raw_ctlinput, .pr_init = raw_init, .pr_usrreqs = &route_usrreqs } }; static struct domain routedomain = { .dom_family = PF_ROUTE, .dom_name = "route", .dom_protosw = routesw, .dom_protoswNPROTOSW = &routesw[sizeof(routesw)/sizeof(routesw[0])] }; VNET_DOMAIN_SET(route); diff --git a/sys/netinet6/ip6_mroute.h b/sys/netinet6/ip6_mroute.h index 7fe017b90107..d7b0014da12e 100644 --- a/sys/netinet6/ip6_mroute.h +++ b/sys/netinet6/ip6_mroute.h @@ -1,265 +1,265 @@ /*- * Copyright (C) 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_mroute.h,v 1.19 2001/06/14 06:12:55 suz Exp $ * $FreeBSD$ */ /* BSDI ip_mroute.h,v 2.5 1996/10/11 16:01:48 pjd Exp */ /* * Definitions for IP multicast forwarding. * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Ajit Thyagarajan, PARC, August 1993. * Modified by Ajit Thyagarajan, PARC, August 1994. * Modified by Ahmed Helmy, USC, September 1996. * * MROUTING Revision: 1.2 */ #ifndef _NETINET6_IP6_MROUTE_H_ #define _NETINET6_IP6_MROUTE_H_ /* * Multicast Routing set/getsockopt commands. */ #ifdef _KERNEL #define MRT6_OINIT 100 /* initialize forwarder (omrt6msg) */ #endif #define MRT6_DONE 101 /* shut down forwarder */ #define MRT6_ADD_MIF 102 /* add multicast interface */ #define MRT6_DEL_MIF 103 /* delete multicast interface */ #define MRT6_ADD_MFC 104 /* insert forwarding cache entry */ #define MRT6_DEL_MFC 105 /* delete forwarding cache entry */ #define MRT6_PIM 107 /* enable pim code */ #define MRT6_INIT 108 /* initialize forwarder (mrt6msg) */ #if BSD >= 199103 #define GET_TIME(t) microtime(&t) #elif defined(sun) #define GET_TIME(t) uniqtime(&t) #else #define GET_TIME(t) ((t) = time) #endif /* * Types and macros for handling bitmaps with one bit per multicast interface. */ typedef u_short mifi_t; /* type of a mif index */ #define MAXMIFS 64 #ifndef IF_SETSIZE #define IF_SETSIZE 256 #endif typedef u_int32_t if_mask; #define NIFBITS (sizeof(if_mask) * NBBY) /* bits per mask */ #ifndef howmany #define howmany(x, y) (((x) + ((y) - 1)) / (y)) #endif typedef struct if_set { if_mask ifs_bits[howmany(IF_SETSIZE, NIFBITS)]; } if_set; #define IF_SET(n, p) ((p)->ifs_bits[(n)/NIFBITS] |= (1 << ((n) % NIFBITS))) #define IF_CLR(n, p) ((p)->ifs_bits[(n)/NIFBITS] &= ~(1 << ((n) % NIFBITS))) #define IF_ISSET(n, p) ((p)->ifs_bits[(n)/NIFBITS] & (1 << ((n) % NIFBITS))) #define IF_COPY(f, t) bcopy(f, t, sizeof(*(f))) #define IF_ZERO(p) bzero(p, sizeof(*(p))) /* * Argument structure for MRT6_ADD_IF. */ struct mif6ctl { mifi_t mif6c_mifi; /* the index of the mif to be added */ u_char mif6c_flags; /* MIFF_ flags defined below */ u_short mif6c_pifi; /* the index of the physical IF */ }; #define MIFF_REGISTER 0x1 /* mif represents a register end-point */ /* * Argument structure for MRT6_ADD_MFC and MRT6_DEL_MFC */ struct mf6cctl { struct sockaddr_in6 mf6cc_origin; /* IPv6 origin of mcasts */ struct sockaddr_in6 mf6cc_mcastgrp; /* multicast group associated */ mifi_t mf6cc_parent; /* incoming ifindex */ struct if_set mf6cc_ifset; /* set of forwarding ifs */ }; /* * The kernel's multicast routing statistics. */ struct mrt6stat { uint64_t mrt6s_mfc_lookups; /* # forw. cache hash table hits */ uint64_t mrt6s_mfc_misses; /* # forw. cache hash table misses */ uint64_t mrt6s_upcalls; /* # calls to multicast routing daemon */ uint64_t mrt6s_no_route; /* no route for packet's origin */ uint64_t mrt6s_bad_tunnel; /* malformed tunnel options */ uint64_t mrt6s_cant_tunnel; /* no room for tunnel options */ uint64_t mrt6s_wrong_if; /* arrived on wrong interface */ uint64_t mrt6s_upq_ovflw; /* upcall Q overflow */ uint64_t mrt6s_cache_cleanups; /* # entries with no upcalls */ uint64_t mrt6s_drop_sel; /* pkts dropped selectively */ uint64_t mrt6s_q_overflow; /* pkts dropped - Q overflow */ uint64_t mrt6s_pkt2large; /* pkts dropped - size > BKT SIZE */ uint64_t mrt6s_upq_sockfull; /* upcalls dropped - socket full */ }; #ifdef MRT6_OINIT /* * Struct used to communicate from kernel to multicast router * note the convenient similarity to an IPv6 header. * XXX old version, superseded by mrt6msg. */ struct omrt6msg { u_long unused1; u_char im6_msgtype; /* what type of message */ u_char im6_mbz; /* must be zero */ u_char im6_mif; /* mif rec'd on */ u_char unused2; struct in6_addr im6_src, im6_dst; }; #endif /* * Structure used to communicate from kernel to multicast router. * We'll overlay the structure onto an MLD header (not an IPv6 header * like igmpmsg{} used for IPv4 implementation). This is because this * structure will be passed via an IPv6 raw socket, on which an application * will only receive the payload i.e. the data after the IPv6 header and all * the extension headers. (see Section 3 of RFC3542) */ struct mrt6msg { #define MRT6MSG_NOCACHE 1 #define MRT6MSG_WRONGMIF 2 #define MRT6MSG_WHOLEPKT 3 /* used for user level encap*/ u_char im6_mbz; /* must be zero */ u_char im6_msgtype; /* what type of message */ u_int16_t im6_mif; /* mif rec'd on */ u_int32_t im6_pad; /* padding for 64bit arch */ struct in6_addr im6_src, im6_dst; }; /* * Argument structure used by multicast routing daemon to get src-grp * packet counts */ struct sioc_sg_req6 { struct sockaddr_in6 src; struct sockaddr_in6 grp; u_quad_t pktcnt; u_quad_t bytecnt; u_quad_t wrong_if; }; /* * Argument structure used by mrouted to get mif pkt counts */ struct sioc_mif_req6 { mifi_t mifi; /* mif number */ u_quad_t icount; /* Input packet count on mif */ u_quad_t ocount; /* Output packet count on mif */ u_quad_t ibytes; /* Input byte count on mif */ u_quad_t obytes; /* Output byte count on mif */ }; -#ifdef _KERNEL +#if defined(_KERNEL) || defined(KERNEL) /* * The kernel's multicast-interface structure. */ struct mif6 { u_char m6_flags; /* MIFF_ flags defined above */ u_int m6_rate_limit; /* max rate */ struct in6_addr m6_lcl_addr; /* local interface address */ struct ifnet *m6_ifp; /* pointer to interface */ u_quad_t m6_pkt_in; /* # pkts in on interface */ u_quad_t m6_pkt_out; /* # pkts out on interface */ u_quad_t m6_bytes_in; /* # bytes in on interface */ u_quad_t m6_bytes_out; /* # bytes out on interface */ #ifdef notyet u_int m6_rsvp_on; /* RSVP listening on this vif */ struct socket *m6_rsvpd; /* RSVP daemon socket */ #endif }; /* * The kernel's multicast forwarding cache entry structure */ struct mf6c { struct sockaddr_in6 mf6c_origin; /* IPv6 origin of mcasts */ struct sockaddr_in6 mf6c_mcastgrp; /* multicast group associated*/ mifi_t mf6c_parent; /* incoming IF */ struct if_set mf6c_ifset; /* set of outgoing IFs */ u_quad_t mf6c_pkt_cnt; /* pkt count for src-grp */ u_quad_t mf6c_byte_cnt; /* byte count for src-grp */ u_quad_t mf6c_wrong_if; /* wrong if for src-grp */ int mf6c_expire; /* time to clean entry up */ struct timeval mf6c_last_assert; /* last time I sent an assert*/ struct rtdetq *mf6c_stall; /* pkts waiting for route */ struct mf6c *mf6c_next; /* hash table linkage */ }; #define MF6C_INCOMPLETE_PARENT ((mifi_t)-1) /* * Argument structure used for pkt info. while upcall is made */ #ifndef _NETINET_IP_MROUTE_H_ struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */ struct mbuf *m; /* A copy of the packet */ struct ifnet *ifp; /* Interface pkt came in on */ #ifdef UPCALL_TIMING struct timeval t; /* Timestamp */ #endif /* UPCALL_TIMING */ struct rtdetq *next; }; #endif /* _NETINET_IP_MROUTE_H_ */ #define MF6CTBLSIZ 256 #if (MF6CTBLSIZ & (MF6CTBLSIZ - 1)) == 0 /* from sys:route.h */ #define MF6CHASHMOD(h) ((h) & (MF6CTBLSIZ - 1)) #else #define MF6CHASHMOD(h) ((h) % MF6CTBLSIZ) #endif #define MAX_UPQ6 4 /* max. no of pkts in upcall Q */ extern int (*ip6_mrouter_set)(struct socket *so, struct sockopt *sopt); extern int (*ip6_mrouter_get)(struct socket *so, struct sockopt *sopt); extern int (*ip6_mrouter_done)(void); extern int (*mrt6_ioctl)(u_long, caddr_t); #endif /* _KERNEL */ #endif /* !_NETINET6_IP6_MROUTE_H_ */