diff --git a/sbin/route/route_netlink.c b/sbin/route/route_netlink.c index 040b97568f20..631c2860b547 100644 --- a/sbin/route/route_netlink.c +++ b/sbin/route/route_netlink.c @@ -1,913 +1,913 @@ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const char *routename(struct sockaddr *); const char *netname(struct sockaddr *); void printb(int, const char *); extern const char routeflags[]; extern int verbose, debugonly; int rtmsg_nl(int cmd, int rtm_flags, int fib, int rtm_addrs, struct sockaddr_storage *so, struct rt_metrics *rt_metrics); int flushroutes_fib_nl(int fib, int af); void monitor_nl(int fib); struct nl_helper; struct snl_msg_info; static void print_getmsg(struct nl_helper *h, struct nlmsghdr *hdr, struct sockaddr *dst); static void print_nlmsg(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo); #define s6_addr32 __u6_addr.__u6_addr32 #define bitcount32(x) __bitcount32((uint32_t)(x)) static int inet6_get_plen(const struct in6_addr *addr) { return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); } static void ip6_writemask(struct in6_addr *addr6, uint8_t mask) { uint32_t *cp; for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) *cp++ = 0xFFFFFFFF; if (mask > 0) *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); } static struct sockaddr * get_netmask(struct snl_state *ss, int family, int plen) { if (family == AF_INET) { if (plen == 32) return (NULL); struct sockaddr_in *sin = snl_allocz(ss, sizeof(*sin)); sin->sin_len = sizeof(*sin); sin->sin_family = family; sin->sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); return (struct sockaddr *)sin; } else if (family == AF_INET6) { if (plen == 128) return (NULL); struct sockaddr_in6 *sin6 = snl_allocz(ss, sizeof(*sin6)); sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = family; ip6_writemask(&sin6->sin6_addr, plen); return (struct sockaddr *)sin6; } return (NULL); } static void nl_init_socket(struct snl_state *ss) { if (snl_init(ss, NETLINK_ROUTE)) return; if (modfind("netlink") == -1 && errno == ENOENT) { /* Try to load */ if (kldload("netlink") == -1) err(1, "netlink is not loaded and load attempt failed"); if (snl_init(ss, NETLINK_ROUTE)) return; } err(1, "unable to open netlink socket"); } struct nl_helper { struct snl_state ss_cmd; }; static void nl_helper_init(struct nl_helper *h) { nl_init_socket(&h->ss_cmd); } static void nl_helper_free(struct nl_helper *h) { snl_free(&h->ss_cmd); } static struct sockaddr * get_addr(struct sockaddr_storage *so, int rtm_addrs, int addr_type) { struct sockaddr *sa = NULL; if (rtm_addrs & (1 << addr_type)) sa = (struct sockaddr *)&so[addr_type]; return (sa); } static int rtmsg_nl_int(struct nl_helper *h, int cmd, int rtm_flags, int fib, int rtm_addrs, struct sockaddr_storage *so, struct rt_metrics *rt_metrics) { struct snl_state *ss = &h->ss_cmd; struct snl_writer nw; int nl_type = 0, nl_flags = 0; snl_init_writer(ss, &nw); switch (cmd) { case RTSOCK_RTM_ADD: nl_type = RTM_NEWROUTE; nl_flags = NLM_F_CREATE | NLM_F_APPEND; /* Do append by default */ break; case RTSOCK_RTM_CHANGE: nl_type = RTM_NEWROUTE; nl_flags = NLM_F_REPLACE; break; case RTSOCK_RTM_DELETE: nl_type = RTM_DELROUTE; break; case RTSOCK_RTM_GET: nl_type = RTM_GETROUTE; break; default: exit(1); } struct sockaddr *dst = get_addr(so, rtm_addrs, RTAX_DST); struct sockaddr *mask = get_addr(so, rtm_addrs, RTAX_NETMASK); struct sockaddr *gw = get_addr(so, rtm_addrs, RTAX_GATEWAY); if (dst == NULL) return (EINVAL); struct nlmsghdr *hdr = snl_create_msg_request(&nw, nl_type); hdr->nlmsg_flags |= nl_flags; int plen = 0; int rtm_type = RTN_UNICAST; switch (dst->sa_family) { case AF_INET: { struct sockaddr_in *mask4 = (struct sockaddr_in *)mask; if ((rtm_flags & RTF_HOST) == 0 && mask4 != NULL) plen = bitcount32(mask4->sin_addr.s_addr); else plen = 32; break; } case AF_INET6: { struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask; if ((rtm_flags & RTF_HOST) == 0 && mask6 != NULL) plen = inet6_get_plen(&mask6->sin6_addr); else plen = 128; break; } default: return (ENOTSUP); } if (rtm_flags & RTF_REJECT) rtm_type = RTN_PROHIBIT; else if (rtm_flags & RTF_BLACKHOLE) rtm_type = RTN_BLACKHOLE; struct rtmsg *rtm = snl_reserve_msg_object(&nw, struct rtmsg); rtm->rtm_family = dst->sa_family; rtm->rtm_protocol = RTPROT_STATIC; rtm->rtm_type = rtm_type; rtm->rtm_dst_len = plen; /* Request exact prefix match if mask is set */ if ((cmd == RTSOCK_RTM_GET) && (mask != NULL)) rtm->rtm_flags = RTM_F_PREFIX; snl_add_msg_attr_ip(&nw, RTA_DST, dst); snl_add_msg_attr_u32(&nw, RTA_TABLE, fib); uint32_t rta_oif = 0; if (gw != NULL) { if (rtm_flags & RTF_GATEWAY) { if (gw->sa_family == dst->sa_family) snl_add_msg_attr_ip(&nw, RTA_GATEWAY, gw); else snl_add_msg_attr_ipvia(&nw, RTA_VIA, gw); if (gw->sa_family == AF_INET6) { struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)gw; if (IN6_IS_ADDR_LINKLOCAL(&gw6->sin6_addr)) rta_oif = gw6->sin6_scope_id; } } else { /* Should be AF_LINK */ struct sockaddr_dl *sdl = (struct sockaddr_dl *)gw; if (sdl->sdl_index != 0) rta_oif = sdl->sdl_index; } } if (dst->sa_family == AF_INET6 && rta_oif == 0) { struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst; if (IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) rta_oif = dst6->sin6_scope_id; } if (rta_oif != 0) snl_add_msg_attr_u32(&nw, RTA_OIF, rta_oif); if (rtm_flags != 0) snl_add_msg_attr_u32(&nw, NL_RTA_RTFLAGS, rtm_flags); if (rt_metrics->rmx_mtu > 0) { int off = snl_add_msg_attr_nested(&nw, RTA_METRICS); snl_add_msg_attr_u32(&nw, RTAX_MTU, rt_metrics->rmx_mtu); snl_end_attr_nested(&nw, off); } if (rt_metrics->rmx_weight > 0) snl_add_msg_attr_u32(&nw, NL_RTA_WEIGHT, rt_metrics->rmx_weight); if ((hdr = snl_finalize_msg(&nw)) && snl_send_message(ss, hdr)) { struct snl_errmsg_data e = {}; hdr = snl_read_reply(ss, hdr->nlmsg_seq); if (nl_type == NL_RTM_GETROUTE) { if (hdr->nlmsg_type == NL_RTM_NEWROUTE) { print_getmsg(h, hdr, dst); return (0); } } if (snl_parse_errmsg(ss, hdr, &e)) { switch (e.error) { case (ESRCH): warnx("route has not been found"); break; default: if (e.error == 0) break; warnc(e.error, "message indicates error"); } return (e.error); } } return (EINVAL); } int rtmsg_nl(int cmd, int rtm_flags, int fib, int rtm_addrs, struct sockaddr_storage *so, struct rt_metrics *rt_metrics) { struct nl_helper h = {}; nl_helper_init(&h); int error = rtmsg_nl_int(&h, cmd, rtm_flags, fib, rtm_addrs, so, rt_metrics); nl_helper_free(&h); return (error); } static void get_ifdata(struct nl_helper *h, uint32_t ifindex, struct snl_parsed_link_simple *link) { struct snl_state *ss = &h->ss_cmd; struct snl_writer nw; snl_init_writer(ss, &nw); struct nlmsghdr *hdr = snl_create_msg_request(&nw, NL_RTM_GETLINK); struct ifinfomsg *ifmsg = snl_reserve_msg_object(&nw, struct ifinfomsg); if (ifmsg != NULL) ifmsg->ifi_index = ifindex; if (! (hdr = snl_finalize_msg(&nw)) || !snl_send_message(ss, hdr)) return; hdr = snl_read_reply(ss, hdr->nlmsg_seq); if (hdr != NULL && hdr->nlmsg_type == RTM_NEWLINK) { snl_parse_nlmsg(ss, hdr, &snl_rtm_link_parser_simple, link); } if (link->ifla_ifname == NULL) { char ifname[16]; snprintf(ifname, sizeof(ifname), "if#%u", ifindex); int len = strlen(ifname); char *buf = snl_allocz(ss, len + 1); strlcpy(buf, ifname, len + 1); link->ifla_ifname = buf; } } static void print_getmsg(struct nl_helper *h, struct nlmsghdr *hdr, struct sockaddr *dst) { struct snl_state *ss = &h->ss_cmd; struct timespec ts; struct snl_parsed_route r = { .rtax_weight = RT_DEFAULT_WEIGHT }; if (!snl_parse_nlmsg(ss, hdr, &snl_rtm_route_parser, &r)) return; struct snl_parsed_link_simple link = {}; get_ifdata(h, r.rta_oif, &link); if (r.rtax_mtu == 0) r.rtax_mtu = link.ifla_mtu; r.rta_rtflags |= (RTF_UP | RTF_DONE); (void)printf(" route to: %s\n", routename(dst)); if (r.rta_dst) (void)printf("destination: %s\n", routename(r.rta_dst)); struct sockaddr *mask = get_netmask(ss, r.rtm_family, r.rtm_dst_len); if (mask) (void)printf(" mask: %s\n", routename(mask)); if (r.rta_gw && (r.rta_rtflags & RTF_GATEWAY)) (void)printf(" gateway: %s\n", routename(r.rta_gw)); (void)printf(" fib: %u\n", (unsigned int)r.rta_table); if (link.ifla_ifname) (void)printf(" interface: %s\n", link.ifla_ifname); (void)printf(" flags: "); printb(r.rta_rtflags, routeflags); struct rt_metrics rmx = { .rmx_mtu = r.rtax_mtu, .rmx_weight = r.rtax_weight, .rmx_expire = r.rta_expire, }; printf("\n%9s %9s %9s %9s %9s %10s %9s\n", "recvpipe", "sendpipe", "ssthresh", "rtt,msec", "mtu ", "weight", "expire"); printf("%8lu ", rmx.rmx_recvpipe); printf("%8lu ", rmx.rmx_sendpipe); printf("%8lu ", rmx.rmx_ssthresh); printf("%8lu ", 0UL); printf("%8lu ", rmx.rmx_mtu); printf("%8lu ", rmx.rmx_weight); if (rmx.rmx_expire > 0) clock_gettime(CLOCK_REALTIME_FAST, &ts); else ts.tv_sec = 0; printf("%8ld \n", (long)(rmx.rmx_expire - ts.tv_sec)); } static void print_prefix(struct nl_helper *h, char *buf, int bufsize, struct sockaddr *sa, int plen) { int sz = 0; if (sa == NULL) { snprintf(buf, bufsize, ""); return; } switch (sa->sa_family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)sa; char abuf[INET_ADDRSTRLEN]; inet_ntop(AF_INET, &sin->sin_addr, abuf, sizeof(abuf)); sz = snprintf(buf, bufsize, "%s", abuf); break; } case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; char abuf[INET6_ADDRSTRLEN]; char *ifname = NULL; inet_ntop(AF_INET6, &sin6->sin6_addr, abuf, sizeof(abuf)); if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { struct snl_parsed_link_simple link = {}; if (sin6->sin6_scope_id != 0) { get_ifdata(h, sin6->sin6_scope_id, &link); ifname = link.ifla_ifname; } } if (ifname == NULL) sz = snprintf(buf, bufsize, "%s", abuf); else sz = snprintf(buf, bufsize, "%s%%%s", abuf, ifname); break; } default: snprintf(buf, bufsize, "unknown_af#%d", sa->sa_family); plen = -1; } if (plen >= 0) snprintf(buf + sz, bufsize - sz, "/%d", plen); } static int print_line_prefix(struct nlmsghdr *hdr, struct snl_msg_info *cinfo, const char *cmd, const char *name) { struct timespec tp; struct tm tm; char buf[32]; clock_gettime(CLOCK_REALTIME, &tp); localtime_r(&tp.tv_sec, &tm); strftime(buf, sizeof(buf), "%T", &tm); int len = printf("%s.%03ld PID %4u %s %s ", buf, tp.tv_nsec / 1000000, cinfo->process_id, cmd, name); return (len); } static const char * get_action_name(struct nlmsghdr *hdr, int new_cmd) { if (hdr->nlmsg_type == new_cmd) { //return ((hdr->nlmsg_flags & NLM_F_REPLACE) ? "replace" : "add"); return ("add/repl"); } else return ("delete"); } static void print_nlmsg_route_nhop(struct nl_helper *h, struct snl_parsed_route *r, struct rta_mpath_nh *nh, bool first) { // gw 10.0.0.1 ifp vtnet0 mtu 1500 table inet.0 if (nh->gw != NULL) { char gwbuf[128]; print_prefix(h, gwbuf, sizeof(gwbuf), nh->gw, -1); printf("gw %s ", gwbuf); } if (nh->ifindex != 0) { struct snl_parsed_link_simple link = {}; get_ifdata(h, nh->ifindex, &link); if (nh->rtax_mtu == 0) nh->rtax_mtu = link.ifla_mtu; printf("iface %s ", link.ifla_ifname); if (nh->rtax_mtu != 0) printf("mtu %d ", nh->rtax_mtu); } if (first) { switch (r->rtm_family) { case AF_INET: printf("table inet.%d", r->rta_table); break; case AF_INET6: printf("table inet6.%d", r->rta_table); break; } } printf("\n"); } static void print_nlmsg_route(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo) { struct snl_parsed_route r = { .rtax_weight = RT_DEFAULT_WEIGHT }; struct snl_state *ss = &h->ss_cmd; if (!snl_parse_nlmsg(ss, hdr, &snl_rtm_route_parser, &r)) return; // 20:19:41.333 add route 10.0.0.0/24 gw 10.0.0.1 ifp vtnet0 mtu 1500 table inet.0 const char *cmd = get_action_name(hdr, RTM_NEWROUTE); int len = print_line_prefix(hdr, cinfo, cmd, "route"); char buf[128]; print_prefix(h, buf, sizeof(buf), r.rta_dst, r.rtm_dst_len); len += strlen(buf) + 1; printf("%s ", buf); switch (r.rtm_type) { case RTN_BLACKHOLE: printf("blackhole\n"); return; case RTN_UNREACHABLE: printf("unreach(reject)\n"); return; case RTN_PROHIBIT: printf("prohibit(reject)\n"); return; } if (r.rta_multipath.num_nhops != 0) { bool first = true; memset(buf, ' ', sizeof(buf)); buf[len] = '\0'; for (uint32_t i = 0; i < r.rta_multipath.num_nhops; i++) { struct rta_mpath_nh *nh = r.rta_multipath.nhops[i]; if (!first) printf("%s", buf); print_nlmsg_route_nhop(h, &r, nh, first); first = false; } } else { struct rta_mpath_nh nh = { .gw = r.rta_gw, .ifindex = r.rta_oif, .rtax_mtu = r.rtax_mtu, }; print_nlmsg_route_nhop(h, &r, &nh, true); } } static const char *operstate[] = { "UNKNOWN", /* 0, IF_OPER_UNKNOWN */ "NOTPRESENT", /* 1, IF_OPER_NOTPRESENT */ "DOWN", /* 2, IF_OPER_DOWN */ "LLDOWN", /* 3, IF_OPER_LOWERLAYERDOWN */ "TESTING", /* 4, IF_OPER_TESTING */ "DORMANT", /* 5, IF_OPER_DORMANT */ "UP", /* 6, IF_OPER_UP */ }; static void print_nlmsg_link(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo) { struct snl_parsed_link l = {}; struct snl_state *ss = &h->ss_cmd; if (!snl_parse_nlmsg(ss, hdr, &snl_rtm_link_parser, &l)) return; // 20:19:41.333 add iface#3 vtnet0 admin UP oper UP mtu 1500 table inet.0 const char *cmd = get_action_name(hdr, RTM_NEWLINK); print_line_prefix(hdr, cinfo, cmd, "iface"); printf("iface#%u %s ", l.ifi_index, l.ifla_ifname); printf("admin %s ", (l.ifi_flags & IFF_UP) ? "UP" : "DOWN"); - if (l.ifla_operstate < NL_ARRAY_LEN(operstate)) + if (l.ifla_operstate < nitems(operstate)) printf("oper %s ", operstate[l.ifla_operstate]); if (l.ifla_mtu > 0) printf("mtu %u ", l.ifla_mtu); printf("\n"); } static void print_nlmsg_addr(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo) { struct snl_parsed_addr attrs = {}; struct snl_state *ss = &h->ss_cmd; if (!snl_parse_nlmsg(ss, hdr, &snl_rtm_addr_parser, &attrs)) return; // add addr 192.168.1.1/24 iface vtnet0 const char *cmd = get_action_name(hdr, RTM_NEWADDR); print_line_prefix(hdr, cinfo, cmd, "addr"); char buf[128]; struct sockaddr *addr = attrs.ifa_local ? attrs.ifa_local : attrs.ifa_address; print_prefix(h, buf, sizeof(buf), addr, attrs.ifa_prefixlen); printf("%s ", buf); struct snl_parsed_link_simple link = {}; get_ifdata(h, attrs.ifa_index, &link); if (link.ifi_flags & IFF_POINTOPOINT) { char buf[64]; print_prefix(h, buf, sizeof(buf), attrs.ifa_address, -1); printf("-> %s ", buf); } printf("iface %s ", link.ifla_ifname); printf("\n"); } static const char *nudstate[] = { "INCOMPLETE", /* 0x01(0) */ "REACHABLE", /* 0x02(1) */ "STALE", /* 0x04(2) */ "DELAY", /* 0x08(3) */ "PROBE", /* 0x10(4) */ "FAILED", /* 0x20(5) */ }; #define NUD_INCOMPLETE 0x01 /* No lladdr, address resolution in progress */ #define NUD_REACHABLE 0x02 /* reachable & recently resolved */ #define NUD_STALE 0x04 /* has lladdr but it's stale */ #define NUD_DELAY 0x08 /* has lladdr, is stale, probes delayed */ #define NUD_PROBE 0x10 /* has lladdr, is stale, probes sent */ #define NUD_FAILED 0x20 /* unused */ static void print_nlmsg_neigh(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo) { struct snl_parsed_neigh attrs = {}; struct snl_state *ss = &h->ss_cmd; if (!snl_parse_nlmsg(ss, hdr, &snl_rtm_neigh_parser, &attrs)) return; // add addr 192.168.1.1 state %s lladdr %s iface vtnet0 const char *cmd = get_action_name(hdr, RTM_NEWNEIGH); print_line_prefix(hdr, cinfo, cmd, "neigh"); char buf[128]; print_prefix(h, buf, sizeof(buf), attrs.nda_dst, -1); printf("%s ", buf); struct snl_parsed_link_simple link = {}; get_ifdata(h, attrs.nda_ifindex, &link); - for (unsigned int i = 0; i < NL_ARRAY_LEN(nudstate); i++) { + for (unsigned int i = 0; i < nitems(nudstate); i++) { if ((1 << i) & attrs.ndm_state) { printf("state %s ", nudstate[i]); break; } } if (attrs.nda_lladdr != NULL) { int if_type = link.ifi_type; if ((if_type == IFT_ETHER || if_type == IFT_L2VLAN || if_type == IFT_BRIDGE) && NLA_DATA_LEN(attrs.nda_lladdr) == ETHER_ADDR_LEN) { struct ether_addr *ll; ll = (struct ether_addr *)NLA_DATA(attrs.nda_lladdr); printf("lladdr %s ", ether_ntoa(ll)); } else { struct sockaddr_dl sdl = { .sdl_len = sizeof(sdl), .sdl_family = AF_LINK, .sdl_index = attrs.nda_ifindex, .sdl_type = if_type, .sdl_alen = NLA_DATA_LEN(attrs.nda_lladdr), }; if (sdl.sdl_alen < sizeof(sdl.sdl_data)) { void *ll = NLA_DATA(attrs.nda_lladdr); memcpy(sdl.sdl_data, ll, sdl.sdl_alen); printf("lladdr %s ", link_ntoa(&sdl)); } } } if (link.ifla_ifname != NULL) printf("iface %s ", link.ifla_ifname); printf("\n"); } static void print_nlmsg_generic(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo) { const char *cmd = get_action_name(hdr, 0); print_line_prefix(hdr, cinfo, cmd, "unknown message"); printf(" type %u\n", hdr->nlmsg_type); } static void print_nlmsg(struct nl_helper *h, struct nlmsghdr *hdr, struct snl_msg_info *cinfo) { switch (hdr->nlmsg_type) { case RTM_NEWLINK: case RTM_DELLINK: print_nlmsg_link(h, hdr, cinfo); break; case RTM_NEWADDR: case RTM_DELADDR: print_nlmsg_addr(h, hdr, cinfo); break; case RTM_NEWROUTE: case RTM_DELROUTE: print_nlmsg_route(h, hdr, cinfo); break; case RTM_NEWNEIGH: case RTM_DELNEIGH: print_nlmsg_neigh(h, hdr, cinfo); break; default: print_nlmsg_generic(h, hdr, cinfo); } snl_clear_lb(&h->ss_cmd); } void monitor_nl(int fib) { struct snl_state ss_event = {}; struct nl_helper h; nl_init_socket(&ss_event); nl_helper_init(&h); int groups[] = { RTNLGRP_LINK, RTNLGRP_NEIGH, RTNLGRP_NEXTHOP, #ifdef INET RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV4_ROUTE, #endif #ifdef INET6 RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_ROUTE, #endif }; int optval = 1; socklen_t optlen = sizeof(optval); setsockopt(ss_event.fd, SOL_NETLINK, NETLINK_MSG_INFO, &optval, optlen); - for (unsigned int i = 0; i < NL_ARRAY_LEN(groups); i++) { + for (unsigned int i = 0; i < nitems(groups); i++) { int error; int optval = groups[i]; socklen_t optlen = sizeof(optval); error = setsockopt(ss_event.fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &optval, optlen); if (error != 0) warn("Unable to subscribe to group %d", optval); } struct snl_msg_info attrs = {}; struct nlmsghdr *hdr; while ((hdr = snl_read_message_dbg(&ss_event, &attrs)) != NULL) { print_nlmsg(&h, hdr, &attrs); snl_clear_lb(&h.ss_cmd); snl_clear_lb(&ss_event); } snl_free(&ss_event); nl_helper_free(&h); exit(0); } static void print_flushed_route(struct snl_parsed_route *r, struct sockaddr *gw) { struct sockaddr *sa = r->rta_dst; printf("%-20.20s ", r->rta_rtflags & RTF_HOST ? routename(sa) : netname(sa)); sa = gw; printf("%-20.20s ", routename(sa)); printf("-fib %-3d ", r->rta_table); printf("done\n"); } static int flushroute_one(struct nl_helper *h, struct snl_parsed_route *r) { struct snl_state *ss = &h->ss_cmd; struct snl_errmsg_data e = {}; struct snl_writer nw; snl_init_writer(ss, &nw); struct nlmsghdr *hdr = snl_create_msg_request(&nw, NL_RTM_DELROUTE); struct rtmsg *rtm = snl_reserve_msg_object(&nw, struct rtmsg); rtm->rtm_family = r->rtm_family; rtm->rtm_dst_len = r->rtm_dst_len; snl_add_msg_attr_u32(&nw, RTA_TABLE, r->rta_table); snl_add_msg_attr_ip(&nw, RTA_DST, r->rta_dst); if (! (hdr = snl_finalize_msg(&nw)) || !snl_send_message(ss, hdr)) return (ENOMEM); if (!snl_read_reply_code(ss, hdr->nlmsg_seq, &e)) { return (e.error); if (e.error == EPERM) errc(1, e.error, "RTM_DELROUTE failed:"); else warnc(e.error, "RTM_DELROUTE failed:"); return (true); }; if (verbose) { struct snl_msg_info attrs = {}; print_nlmsg(h, hdr, &attrs); } else { if (r->rta_multipath.num_nhops != 0) { for (uint32_t i = 0; i < r->rta_multipath.num_nhops; i++) { struct rta_mpath_nh *nh = r->rta_multipath.nhops[i]; print_flushed_route(r, nh->gw); } } else print_flushed_route(r, r->rta_gw); } return (0); } int flushroutes_fib_nl(int fib, int af) { struct snl_state ss = {}; struct snl_writer nw; struct nl_helper h = {}; nl_init_socket(&ss); snl_init_writer(&ss, &nw); struct nlmsghdr *hdr = snl_create_msg_request(&nw, NL_RTM_GETROUTE); hdr->nlmsg_flags |= NLM_F_DUMP; struct rtmsg *rtm = snl_reserve_msg_object(&nw, struct rtmsg); rtm->rtm_family = af; snl_add_msg_attr_u32(&nw, RTA_TABLE, fib); if (! (hdr = snl_finalize_msg(&nw)) || !snl_send_message(&ss, hdr)) { snl_free(&ss); return (EINVAL); } struct snl_errmsg_data e = {}; uint32_t nlm_seq = hdr->nlmsg_seq; nl_helper_init(&h); while ((hdr = snl_read_reply_multi(&ss, nlm_seq, &e)) != NULL) { struct snl_parsed_route r = { .rtax_weight = RT_DEFAULT_WEIGHT }; int error; if (!snl_parse_nlmsg(&ss, hdr, &snl_rtm_route_parser, &r)) continue; if (verbose) { struct snl_msg_info attrs = {}; print_nlmsg(&h, hdr, &attrs); } if (r.rta_table != (uint32_t)fib || r.rtm_family != af) continue; if ((r.rta_rtflags & RTF_GATEWAY) == 0) continue; if (debugonly) continue; if ((error = flushroute_one(&h, &r)) != 0) { if (error == EPERM) errc(1, error, "RTM_DELROUTE failed:"); else warnc(error, "RTM_DELROUTE failed:"); } snl_clear_lb(&h.ss_cmd); } snl_free(&ss); nl_helper_free(&h); return (e.error); } diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index ab001d346313..133dba45cf36 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -1,3110 +1,3110 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Michael Shalayeff. * Copyright (c) 2003 Ryan McBride. * Copyright (c) 2011 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "opt_bpf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #include #include #include #endif #include #include #include #include #include static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ carp_version_t sc_version; /* carp or VRRPv3 */ uint8_t sc_addr[ETHER_ADDR_LEN]; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET struct callout sc_md_tmo; /* Master down timeout. */ #endif #ifdef INET6 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ #endif struct mtx sc_mtx; int sc_vhid; union { struct { /* sc_version == CARP_VERSION_CARP */ int sc_advskew; int sc_advbase; struct in_addr sc_carpaddr; struct in6_addr sc_carpaddr6; uint64_t sc_counter; bool sc_init_counter; #define CARP_HMAC_PAD 64 unsigned char sc_key[CARP_KEY_LEN]; unsigned char sc_pad[CARP_HMAC_PAD]; SHA1_CTX sc_sha1; }; struct { /* sc_version == CARP_VERSION_VRRPv3 */ uint8_t sc_vrrp_prio; uint16_t sc_vrrp_adv_inter; uint16_t sc_vrrp_master_inter; }; }; int sc_naddrs; int sc_naddrs6; int sc_ifasiz; enum { INIT = 0, BACKUP, MASTER } sc_state; int sc_suppress; int sc_sendad_errors; #define CARP_SENDAD_MAX_ERRORS 3 int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ }; struct carp_if { #ifdef INET int cif_naddrs; #endif #ifdef INET6 int cif_naddrs6; #endif TAILQ_HEAD(, carp_softc) cif_vrs; #ifdef INET struct ip_moptions cif_imo; #endif #ifdef INET6 struct ip6_moptions cif_im6o; #endif struct ifnet *cif_ifp; struct mtx cif_mtx; uint32_t cif_flags; #define CIF_PROMISC 0x00000001 }; /* Kernel equivalent of struct carpreq, but with more fields for new features. * */ struct carpkreq { int carpr_count; int carpr_vhid; int carpr_state; int carpr_advskew; int carpr_advbase; unsigned char carpr_key[CARP_KEY_LEN]; /* Everything above this is identical to carpreq */ struct in_addr carpr_addr; struct in6_addr carpr_addr6; carp_version_t carpr_version; uint8_t carpr_vrrp_priority; uint16_t carpr_vrrp_adv_inter; }; /* * Brief design of carp(4). * * Any carp-capable ifnet may have a list of carp softcs hanging off * its ifp->if_carp pointer. Each softc represents one unique virtual * host id, or vhid. The softc has a back pointer to the ifnet. All * softcs are joined in a global list, which has quite limited use. * * Any interface address that takes part in CARP negotiation has a * pointer to the softc of its vhid, ifa->ifa_carp. That could be either * AF_INET or AF_INET6 address. * * Although, one can get the softc's backpointer to ifnet and traverse * through its ifp->if_addrhead queue to find all interface addresses * involved in CARP, we keep a growable array of ifaddr pointers. This * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that * do calls into the network stack, thus avoiding LORs. * * Locking: * * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), * callout-driven events and ioctl()s. * * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. * To traverse the global list we use the mutex carp_mtx. * * Known issues with locking: * * - Sending ad, we put the pointer to the softc in an mtag, and no reference * counting is done on the softc. * - On module unload we may race (?) with packet processing thread * dereferencing our function pointers. */ /* Accept incoming CARP packets. */ VNET_DEFINE_STATIC(int, carp_allow) = 1; #define V_carp_allow VNET(carp_allow) /* Set DSCP in outgoing CARP packets. */ VNET_DEFINE_STATIC(int, carp_dscp) = 56; #define V_carp_dscp VNET(carp_dscp) /* Preempt slower nodes. */ VNET_DEFINE_STATIC(int, carp_preempt) = 0; #define V_carp_preempt VNET(carp_preempt) /* Log level. */ VNET_DEFINE_STATIC(int, carp_log) = 1; #define V_carp_log VNET(carp_log) /* Global advskew demotion. */ VNET_DEFINE_STATIC(int, carp_demotion) = 0; #define V_carp_demotion VNET(carp_demotion) /* Send error demotion factor. */ VNET_DEFINE_STATIC(int, carp_senderr_adj) = CARP_MAXSKEW; #define V_carp_senderr_adj VNET(carp_senderr_adj) /* Iface down demotion factor. */ VNET_DEFINE_STATIC(int, carp_ifdown_adj) = CARP_MAXSKEW; #define V_carp_ifdown_adj VNET(carp_ifdown_adj) static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "CARP"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, &VNET_NAME(carp_allow), 0, carp_allow_sysctl, "I", "Accept incoming CARP packets"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, carp_dscp_sysctl, "I", "DSCP value for carp packets"); SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_log), 0, "CARP log level"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, carp_demote_adj_sysctl, "I", "Adjust demotion factor (skew of advskew)"); SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_ifdown_adj), 0, "Interface down demotion factor adjustment"); VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); VNET_PCPUSTAT_SYSINIT(carpstats); VNET_PCPUSTAT_SYSUNINIT(carpstats); #define CARPSTATS_ADD(name, val) \ counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ sizeof(uint64_t)], (val)) #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ NULL, MTX_DEF) #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ NULL, MTX_DEF) #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) #define CIF_FREE(cif) do { \ CIF_LOCK(cif); \ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ carp_free_if(cif); \ else \ CIF_UNLOCK(cif); \ } while (0) #define CARP_LOG(...) do { \ if (V_carp_log > 0) \ log(LOG_INFO, "carp: " __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do { \ if (V_carp_log > 1) \ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) #define IFNET_FOREACH_IFA(ifp, ifa) \ CK_STAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ if ((ifa)->ifa_carp != NULL) #define CARP_FOREACH_IFA(sc, ifa) \ CARP_LOCK_ASSERT(sc); \ for (int _i = 0; \ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ ((ifa) = sc->sc_ifas[_i]) != NULL; \ ++_i) #define IFNET_FOREACH_CARP(ifp, sc) \ KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) #define DEMOTE_ADVSKEW(sc) \ (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ CARP_MAXSKEW : \ (((sc)->sc_advskew + V_carp_demotion < 0) ? \ 0 : ((sc)->sc_advskew + V_carp_demotion))) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t, int); static void vrrp_input_c(struct mbuf *, int, sa_family_t, int, int, uint16_t); static struct carp_softc *carp_alloc(struct ifnet *, carp_version_t, int); static void carp_destroy(struct carp_softc *); static struct carp_if *carp_alloc_if(struct ifnet *); static void carp_free_if(struct carp_if *); static void carp_set_state(struct carp_softc *, int, const char* reason); static void carp_sc_state(struct carp_softc *); static void carp_setrun(struct carp_softc *, sa_family_t); static void carp_master_down(void *); static void carp_master_down_locked(struct carp_softc *, const char* reason); static void carp_send_ad_locked(struct carp_softc *); static void vrrp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); static void carp_ifa_delroute(struct ifaddr *); static void carp_send_ad_all(void *, int); static void carp_demote_adj(int, char *); static LIST_HEAD(, carp_softc) carp_list; static struct mtx carp_mtx; static struct sx carp_sx; static struct task carp_sendall_task = TASK_INITIALIZER(0, carp_send_ad_all, NULL); static int carp_is_supported_if(if_t ifp) { if (ifp == NULL) return (ENXIO); switch (ifp->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_BRIDGE: break; default: return (EOPNOTSUPP); } return (0); } static void carp_hmac_prepare(struct carp_softc *sc) { uint8_t version = CARP_VERSION_CARP, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; #ifdef INET struct in_addr last, cur, in; #endif #ifdef INET6 struct in6_addr last6, cur6, in6; #endif CARP_LOCK_ASSERT(sc); MPASS(sc->sc_version == CARP_VERSION_CARP); /* Compute ipad from key. */ bzero(sc->sc_pad, sizeof(sc->sc_pad)); bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36; /* Precompute first part of inner hash. */ SHA1Init(&sc->sc_sha1); SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); #ifdef INET cur.s_addr = 0; do { found = 0; last = cur; cur.s_addr = 0xffffffff; CARP_FOREACH_IFA(sc, ifa) { in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; if (ifa->ifa_addr->sa_family == AF_INET && ntohl(in.s_addr) > ntohl(last.s_addr) && ntohl(in.s_addr) < ntohl(cur.s_addr)) { cur.s_addr = in.s_addr; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); } while (found); #endif /* INET */ #ifdef INET6 memset(&cur6, 0, sizeof(cur6)); do { found = 0; last6 = cur6; memset(&cur6, 0xff, sizeof(cur6)); CARP_FOREACH_IFA(sc, ifa) { in6 = ifatoia6(ifa)->ia_addr.sin6_addr; if (IN6_IS_SCOPE_EMBED(&in6)) in6.s6_addr16[1] = 0; if (ifa->ifa_addr->sa_family == AF_INET6 && memcmp(&in6, &last6, sizeof(in6)) > 0 && memcmp(&in6, &cur6, sizeof(in6)) < 0) { cur6 = in6; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); } while (found); #endif /* INET6 */ /* convert ipad to opad */ for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36 ^ 0x5c; } static void carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { SHA1_CTX sha1ctx; CARP_LOCK_ASSERT(sc); /* fetch first half of inner hash */ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); SHA1Final(md, &sha1ctx); /* outer hash */ SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sha1ctx, md, 20); SHA1Final(md, &sha1ctx); } static int carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { unsigned char md2[20]; CARP_LOCK_ASSERT(sc); carp_hmac_generate(sc, counter, md2); return (bcmp(md, md2, sizeof(md2))); } static int vrrp_checksum_verify(struct mbuf *m, int off, int len, uint16_t phdrcksum) { uint16_t cksum; /* * Note that VRRPv3 checksums are different from CARP checksums. * Carp just calculates the checksum over the packet. * VRRPv3 includes the pseudo-header checksum as well. */ cksum = in_cksum_skip(m, off + len, off); cksum -= phdrcksum; return (cksum); } /* * process input packet. * we have rearranged checks order compared to the rfc, * but it seems more efficient this way or not possible otherwise. */ #ifdef INET static int carp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip; struct vrrpv3_header *vh; int iplen; int minlen; int totlen; iplen = *offp; *mp = NULL; CARPSTATS_INC(carps_ipackets); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* Ensure we have enough header to figure out the version. */ if (m->m_pkthdr.len < iplen + sizeof(*vh)) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: received len %zd < sizeof(struct vrrpv3_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } if (m->m_len < iplen + sizeof(*vh)) { if ((m = m_pullup(m, iplen + sizeof(*vh))) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); return (IPPROTO_DONE); } } ip = mtod(m, struct ip *); totlen = ntohs(ip->ip_len); vh = (struct vrrpv3_header *)((char *)ip + iplen); switch (vh->vrrp_version) { case CARP_VERSION_CARP: minlen = sizeof(struct carp_header); break; case CARP_VERSION_VRRPv3: minlen = sizeof(struct vrrpv3_header); break; default: CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } /* And now check the length again but with the real minimal length. */ if (m->m_pkthdr.len < iplen + minlen) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: received len %zd < %d " "on %s\n", __func__, m->m_len - sizeof(struct ip), iplen + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } if (m->m_len < iplen + minlen) { if ((m = m_pullup(m, iplen + minlen)) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); vh = (struct vrrpv3_header *)((char *)ip + iplen); } switch (vh->vrrp_version) { case CARP_VERSION_CARP: { struct carp_header *ch; /* verify the CARP checksum */ if (in_cksum_skip(m, totlen, iplen)) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: checksum failed on %s\n", __func__, if_name(m->m_pkthdr.rcvif)); m_freem(m); break; } ch = (struct carp_header *)((char *)ip + iplen); carp_input_c(m, ch, AF_INET, ip->ip_ttl); break; } case CARP_VERSION_VRRPv3: { uint16_t phdrcksum; phdrcksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((u_short)(totlen - iplen) + ip->ip_p)); vrrp_input_c(m, iplen, AF_INET, ip->ip_ttl, totlen - iplen, phdrcksum); break; } default: KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); } return (IPPROTO_DONE); } #endif #ifdef INET6 static int carp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct vrrpv3_header *vh; u_int len, minlen; CARPSTATS_INC(carps_ipackets6); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* check if received on a valid carp interface */ if (m->m_pkthdr.rcvif->if_carp == NULL) { CARPSTATS_INC(carps_badif); CARP_DEBUG("%s: packet received on non-carp interface: %s\n", __func__, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } if (m->m_len < *offp + sizeof(*vh)) { len = m->m_len; m = m_pullup(m, *offp + sizeof(*vh)); if (m == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); return (IPPROTO_DONE); } ip6 = mtod(m, struct ip6_hdr *); } vh = (struct vrrpv3_header *)(mtod(m, char *) + *offp); switch (vh->vrrp_version) { case CARP_VERSION_CARP: minlen = sizeof(struct carp_header); break; case CARP_VERSION_VRRPv3: minlen = sizeof(struct vrrpv3_header); break; default: CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: unsupported version %d on %s\n", __func__, vh->vrrp_version, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } /* And now check the length again but with the real minimal length. */ if (m->m_pkthdr.len < sizeof(*ip6) + minlen) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: received len %zd < %zd " "on %s\n", __func__, m->m_len - sizeof(struct ip), sizeof(*ip6) + minlen, if_name(m->m_pkthdr.rcvif)); m_freem(m); return (IPPROTO_DONE); } if (m->m_len < sizeof(*ip6) + minlen) { if ((m = m_pullup(m, sizeof(*ip6) + minlen)) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("%s():%d: pullup failed\n", __func__, __LINE__); return (IPPROTO_DONE); } ip6 = mtod(m, struct ip6_hdr *); vh = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); } switch (vh->vrrp_version) { case CARP_VERSION_CARP: { struct carp_header *ch; /* verify the CARP checksum */ if (in_cksum_skip(m, *offp + sizeof(struct carp_header), *offp)) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: checksum failed, on %s\n", __func__, if_name(m->m_pkthdr.rcvif)); m_freem(m); break; } ch = (struct carp_header *)((char *)ip6 + sizeof(*ip6)); carp_input_c(m, ch, AF_INET6, ip6->ip6_hlim); break; } case CARP_VERSION_VRRPv3: { uint16_t phdrcksum; phdrcksum = in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), ip6->ip6_nxt, 0); vrrp_input_c(m, sizeof(*ip6), AF_INET6, ip6->ip6_hlim, ntohs(ip6->ip6_plen), phdrcksum); break; } default: KASSERT(false, ("Unsupported version %d", vh->vrrp_version)); } return (IPPROTO_DONE); } #endif /* INET6 */ /* * This routine should not be necessary at all, but some switches * (VMWare ESX vswitches) can echo our own packets back at us, * and we must ignore them or they will cause us to drop out of * MASTER mode. * * We cannot catch all cases of network loops. Instead, what we * do here is catch any packet that arrives with a carp header * with a VHID of 0, that comes from an address that is our own. * These packets are by definition "from us" (even if they are from * a misconfigured host that is pretending to be us). * * The VHID test is outside this mini-function. */ static int carp_source_is_self(const struct mbuf *m, struct ifaddr *ifa, sa_family_t af) { #ifdef INET struct ip *ip4; struct in_addr in4; #endif #ifdef INET6 struct ip6_hdr *ip6; struct in6_addr in6; #endif switch (af) { #ifdef INET case AF_INET: ip4 = mtod(m, struct ip *); in4 = ifatoia(ifa)->ia_addr.sin_addr; return (in4.s_addr == ip4->ip_src.s_addr); #endif #ifdef INET6 case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); in6 = ifatoia6(ifa)->ia_addr.sin6_addr; return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); #endif default: break; } return (0); } static struct ifaddr * carp_find_ifa(const struct mbuf *m, sa_family_t af, uint8_t vhid) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa, *match; int error; NET_EPOCH_ASSERT(); /* * Verify that the VHID is valid on the receiving interface. * * There should be just one match. If there are none * the VHID is not valid and we drop the packet. If * there are multiple VHID matches, take just the first * one, for compatibility with previous code. While we're * scanning, check for obvious loops in the network topology * (these should never happen, and as noted above, we may * miss real loops; this is just a double-check). */ error = 0; match = NULL; IFNET_FOREACH_IFA(ifp, ifa) { if (match == NULL && ifa->ifa_carp != NULL && ifa->ifa_addr->sa_family == af && ifa->ifa_carp->sc_vhid == vhid) match = ifa; if (vhid == 0 && carp_source_is_self(m, ifa, af)) error = ELOOP; } ifa = error ? NULL : match; if (ifa != NULL) ifa_ref(ifa); if (ifa == NULL) { if (error == ELOOP) { CARP_DEBUG("dropping looped packet on interface %s\n", if_name(ifp)); CARPSTATS_INC(carps_badif); /* ??? */ } else { CARPSTATS_INC(carps_badvhid); } } return (ifa); } static void carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af, int ttl) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa; struct carp_softc *sc; uint64_t tmp_counter; struct timeval sc_tv, ch_tv; bool multicast = false; NET_EPOCH_ASSERT(); MPASS(ch->carp_version == CARP_VERSION_CARP); ifa = carp_find_ifa(m, af, ch->carp_vhid); if (ifa == NULL) { m_freem(m); return; } sc = ifa->ifa_carp; CARP_LOCK(sc); /* verify the CARP version. */ if (sc->sc_version != CARP_VERSION_CARP) { CARP_UNLOCK(sc); CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), ch->carp_version); ifa_free(ifa); m_freem(m); return; } if (ifa->ifa_addr->sa_family == AF_INET) { multicast = IN_MULTICAST(sc->sc_carpaddr.s_addr); } else { multicast = IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6); } ifa_free(ifa); /* verify that the IP TTL is 255, but only if we're not in unicast mode. */ if (multicast && ttl != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ttl, if_name(m->m_pkthdr.rcvif)); goto out; } if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { CARPSTATS_INC(carps_badauth); CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, sc->sc_vhid, if_name(ifp)); goto out; } tmp_counter = ntohl(ch->carp_counter[0]); tmp_counter = tmp_counter<<32; tmp_counter += ntohl(ch->carp_counter[1]); /* XXX Replay protection goes here */ sc->sc_init_counter = false; sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; ch_tv.tv_sec = ch->carp_advbase; ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; switch (sc->sc_state) { case INIT: break; case MASTER: /* * If we receive an advertisement from a master who's going to * be more frequent than us, go into BACKUP state. */ if (timevalcmp(&sc_tv, &ch_tv, >) || timevalcmp(&sc_tv, &ch_tv, ==)) { callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "more frequent advertisement received"); carp_setrun(sc, 0); carp_delroute(sc); } break; case BACKUP: /* * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "preempting a slower master"); break; } /* * If the master is going to advertise at such a low frequency * that he's guaranteed to time out, we'd might as well just * treat him as timed out now. */ sc_tv.tv_sec = sc->sc_advbase * 3; if (timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "master will time out"); break; } /* * Otherwise, we reset the counter and wait for the next * advertisement. */ carp_setrun(sc, af); break; } out: CARP_UNLOCK(sc); m_freem(m); } static void vrrp_input_c(struct mbuf *m, int off, sa_family_t af, int ttl, int len, uint16_t phdrcksum) { struct vrrpv3_header *vh = mtodo(m, off); struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa; struct carp_softc *sc; NET_EPOCH_ASSERT(); MPASS(vh->vrrp_version == CARP_VERSION_VRRPv3); ifa = carp_find_ifa(m, af, vh->vrrp_vrtid); if (ifa == NULL) { m_freem(m); return; } sc = ifa->ifa_carp; CARP_LOCK(sc); ifa_free(ifa); /* verify the CARP version. */ if (sc->sc_version != CARP_VERSION_VRRPv3) { CARP_UNLOCK(sc); CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", if_name(ifp), vh->vrrp_version); m_freem(m); return; } /* verify that the IP TTL is 255. */ if (ttl != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ttl, if_name(m->m_pkthdr.rcvif)); goto out; } if (vrrp_checksum_verify(m, off, len, phdrcksum)) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: incorrect checksum for VRID %u@%s\n", __func__, sc->sc_vhid, if_name(ifp)); goto out; } /* RFC9568, 7.1 Receiving VRRP packets. */ if (sc->sc_vrrp_prio == 255) { CARP_DEBUG("%s: our priority is 255. Ignore peer announcement.\n", __func__); goto out; } /* XXX TODO Check IP address payload. */ sc->sc_vrrp_master_inter = ntohs(vh->vrrp_max_adver_int); switch (sc->sc_state) { case INIT: break; case MASTER: /* * If we receive an advertisement from a master who's going to * be more frequent than us, go into BACKUP state. * Same if the peer has a higher priority than us. */ if (ntohs(vh->vrrp_max_adver_int) < sc->sc_vrrp_adv_inter || vh->vrrp_priority > sc->sc_vrrp_prio) { callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "more frequent advertisement received"); carp_setrun(sc, 0); carp_delroute(sc); } break; case BACKUP: /* * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ if (V_carp_preempt && (ntohs(vh->vrrp_max_adver_int) > sc->sc_vrrp_adv_inter || vh->vrrp_priority < sc->sc_vrrp_prio)) { carp_master_down_locked(sc, "preempting a slower master"); break; } /* * Otherwise, we reset the counter and wait for the next * advertisement. */ carp_setrun(sc, af); break; } out: CARP_UNLOCK(sc); m_freem(m); } static int carp_tag(struct carp_softc *sc, struct mbuf *m) { struct m_tag *mtag; /* Tag packet for carp_output */ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(sc->sc_vhid), M_NOWAIT)) == NULL) { m_freem(m); CARPSTATS_INC(carps_onomem); return (ENOMEM); } bcopy(&sc->sc_vhid, mtag + 1, sizeof(sc->sc_vhid)); m_tag_prepend(m, mtag); return (0); } static void carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { MPASS(sc->sc_version == CARP_VERSION_CARP); if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); } else sc->sc_counter++; ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); } static inline void send_ad_locked(struct carp_softc *sc) { switch (sc->sc_version) { case CARP_VERSION_CARP: carp_send_ad_locked(sc); break; case CARP_VERSION_VRRPv3: vrrp_send_ad_locked(sc); break; } } /* * To avoid LORs and possible recursions this function shouldn't * be called directly, but scheduled via taskqueue. */ static void carp_send_ad_all(void *ctx __unused, int pending __unused) { struct carp_softc *sc; struct epoch_tracker et; NET_EPOCH_ENTER(et); mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) if (sc->sc_state == MASTER) { CARP_LOCK(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); NET_EPOCH_EXIT(et); } /* Send a periodic advertisement, executed in callout context. */ static void carp_callout(void *v) { struct carp_softc *sc = v; struct epoch_tracker et; NET_EPOCH_ENTER(et); CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); NET_EPOCH_EXIT(et); } static void carp_send_ad_error(struct carp_softc *sc, int error) { /* * We track errors and successful sends with this logic: * - Any error resets success counter to 0. * - MAX_ERRORS triggers demotion. * - MIN_SUCCESS successes resets error counter to 0. * - MIN_SUCCESS reverts demotion, if it was triggered before. */ if (error) { if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { static const char fmt[] = "send error %d on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, if_name(sc->sc_carpdev)); carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else if (sc->sc_sendad_errors > 0) { if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { static const char fmt[] = "send ok on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, if_name(sc->sc_carpdev)); carp_demote_adj(-V_carp_senderr_adj, msg); } sc->sc_sendad_errors = 0; } } } /* * Pick the best ifaddr on the given ifp for sending CARP * advertisements. * * "Best" here is defined by ifa_preferred(). This function is much * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). * * (This could be simplified to return the actual address, except that * it has a different format in AF_INET and AF_INET6.) */ static struct ifaddr * carp_best_ifa(int af, struct ifnet *ifp) { struct ifaddr *ifa, *best; NET_EPOCH_ASSERT(); if (af >= AF_MAX) return (NULL); best = NULL; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == af && (best == NULL || ifa_preferred(best, ifa))) best = ifa; } if (best != NULL) ifa_ref(best); return (best); } static void carp_send_ad_locked(struct carp_softc *sc) { struct carp_header ch; struct timeval tv; struct ifaddr *ifa; struct carp_header *ch_ptr; struct mbuf *m; int len, advskew; NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); MPASS(sc->sc_version == CARP_VERSION_CARP); advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; ch.carp_version = CARP_VERSION_CARP; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; ch.carp_advskew = advskew; ch.carp_authlen = 7; /* XXX DEFINE */ ch.carp_pad1 = 0; /* must be zero */ ch.carp_cksum = 0; /* XXXGL: OpenBSD picks first ifaddr with needed family. */ #ifdef INET if (sc->sc_naddrs) { struct ip *ip; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); if (IN_MULTICAST(sc->sc_carpaddr.s_addr)) m->m_flags |= M_MCAST; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; ip->ip_len = htons(len); ip->ip_off = htons(IP_DF); ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; ip_fillid(ip); ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); if (ifa != NULL) { ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; ifa_free(ifa); } else ip->ip_src.s_addr = 0; ip->ip_dst = sc->sc_carpaddr; ch_ptr = (struct carp_header *)(&ip[1]); bcopy(&ch, ch_ptr, sizeof(ch)); carp_prepare_ad(m, sc, ch_ptr); if (IN_MULTICAST(ntohl(sc->sc_carpaddr.s_addr)) && carp_tag(sc, m) != 0) goto resched; m->m_data += sizeof(*ip); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); m->m_data -= sizeof(*ip); CARPSTATS_INC(carps_opackets); carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_carpdev->if_carp->cif_imo, NULL)); } #endif /* INET */ #ifdef INET6 if (sc->sc_naddrs6) { struct ip6_hdr *ip6; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip6) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); ip6 = mtod(m, struct ip6_hdr *); bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc |= IPV6_VERSION; /* Traffic class isn't defined in ip6 struct instead * it gets offset into flowid field */ ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + IPTOS_DSCP_OFFSET)); ip6->ip6_hlim = CARP_DFLTTL; ip6->ip6_nxt = IPPROTO_CARP; /* set the source address */ ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); if (ifa != NULL) { bcopy(IFA_IN6(ifa), &ip6->ip6_src, sizeof(struct in6_addr)); ifa_free(ifa); } else /* This should never happen with IPv6. */ bzero(&ip6->ip6_src, sizeof(struct in6_addr)); /* Set the multicast destination. */ memcpy(&ip6->ip6_dst, &sc->sc_carpaddr6, sizeof(ip6->ip6_dst)); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { m_freem(m); CARP_DEBUG("%s: in6_setscope failed\n", __func__); goto resched; } } ch_ptr = (struct carp_header *)(&ip6[1]); bcopy(&ch, ch_ptr, sizeof(ch)); carp_prepare_ad(m, sc, ch_ptr); if (IN6_IS_ADDR_MULTICAST(&sc->sc_carpaddr6) && carp_tag(sc, m) != 0) goto resched; m->m_data += sizeof(*ip6); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); m->m_data -= sizeof(*ip6); CARPSTATS_INC(carps_opackets6); carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); } #endif /* INET6 */ resched: callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); } static void vrrp_send_ad_locked(struct carp_softc *sc) { struct vrrpv3_header *vh_ptr; struct ifaddr *ifa; struct mbuf *m; int len; struct vrrpv3_header vh = { .vrrp_version = CARP_VERSION_VRRPv3, .vrrp_type = VRRP_TYPE_ADVERTISEMENT, .vrrp_vrtid = sc->sc_vhid, .vrrp_priority = sc->sc_vrrp_prio, .vrrp_count_addr = 0, .vrrp_max_adver_int = htons(sc->sc_vrrp_adv_inter), .vrrp_checksum = 0, }; NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); MPASS(sc->sc_version == CARP_VERSION_VRRPv3); #ifdef INET if (sc->sc_naddrs) { struct ip *ip; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip) + sizeof(vh); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = V_carp_dscp << IPTOS_DSCP_OFFSET; ip->ip_off = htons(IP_DF); ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; ip_fillid(ip); ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); if (ifa != NULL) { ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; ifa_free(ifa); } else ip->ip_src.s_addr = 0; ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); /* Include the IP addresses in the announcement. */ for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { struct sockaddr_in *in; MPASS(sc->sc_ifas[i] != NULL); if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET) continue; in = (struct sockaddr_in *)sc->sc_ifas[i]->ifa_addr; if (m_append(m, sizeof(in->sin_addr), (caddr_t)&in->sin_addr) != 1) { m_freem(m); goto resched; } vh.vrrp_count_addr++; len += sizeof(in->sin_addr); } ip->ip_len = htons(len); vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip)); bcopy(&vh, vh_ptr, sizeof(vh)); vh_ptr->vrrp_checksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl((uint16_t)(len - sizeof(*ip)) + ip->ip_p)); vh_ptr->vrrp_checksum = in_cksum_skip(m, len, sizeof(*ip)); if (carp_tag(sc, m)) goto resched; CARPSTATS_INC(carps_opackets); carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_carpdev->if_carp->cif_imo, NULL)); } #endif #ifdef INET6 if (sc->sc_naddrs6) { struct ip6_hdr *ip6; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip6) + sizeof(vh); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip6 = mtod(m, struct ip6_hdr *); bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc |= IPV6_VERSION; /* Traffic class isn't defined in ip6 struct instead * it gets offset into flowid field */ ip6->ip6_flow |= htonl(V_carp_dscp << (IPV6_FLOWLABEL_LEN + IPTOS_DSCP_OFFSET)); ip6->ip6_hlim = CARP_DFLTTL; ip6->ip6_nxt = IPPROTO_CARP; /* set the source address */ ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); if (ifa != NULL) { bcopy(IFA_IN6(ifa), &ip6->ip6_src, sizeof(struct in6_addr)); ifa_free(ifa); } else /* This should never happen with IPv6. */ bzero(&ip6->ip6_src, sizeof(struct in6_addr)); /* Set the multicast destination. */ bzero(&ip6->ip6_dst, sizeof(ip6->ip6_dst)); ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; ip6->ip6_dst.s6_addr8[15] = 0x12; /* Include the IP addresses in the announcement. */ len = sizeof(vh); for (int i = 0; i < (sc->sc_naddrs + sc->sc_naddrs6); i++) { struct sockaddr_in6 *in6; MPASS(sc->sc_ifas[i] != NULL); if (sc->sc_ifas[i]->ifa_addr->sa_family != AF_INET6) continue; in6 = (struct sockaddr_in6 *)sc->sc_ifas[i]->ifa_addr; if (m_append(m, sizeof(in6->sin6_addr), (char *)&in6->sin6_addr) != 1) { m_freem(m); goto resched; } vh.vrrp_count_addr++; len += sizeof(in6->sin6_addr); } ip6->ip6_plen = htonl(len); vh_ptr = (struct vrrpv3_header *)mtodo(m, sizeof(*ip6)); bcopy(&vh, vh_ptr, sizeof(vh)); vh_ptr->vrrp_checksum = in6_cksum_pseudo(ip6, len, ip6->ip6_nxt, 0); vh_ptr->vrrp_checksum = in_cksum_skip(m, len + sizeof(*ip6), sizeof(*ip6)); if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { m_freem(m); CARP_DEBUG("%s: in6_setscope failed\n", __func__); goto resched; } if (carp_tag(sc, m)) goto resched; CARPSTATS_INC(carps_opackets6); carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); } #endif resched: callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, carp_callout, sc); } static void carp_addroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_addroute(ifa); } static void carp_ifa_addroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: in_addprefix(ifatoia(ifa)); ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); break; #endif #ifdef INET6 case AF_INET6: ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_add_ifa_lle(ifatoia6(ifa)); break; #endif } } static void carp_delroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_delroute(ifa); } static void carp_ifa_delroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); in_scrubprefix(ifatoia(ifa), LLE_STATIC); break; #endif #ifdef INET6 case AF_INET6: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_rem_ifa_lle(ifatoia6(ifa), 1); break; #endif } } int carp_master(struct ifaddr *ifa) { struct carp_softc *sc = ifa->ifa_carp; return (sc->sc_state == MASTER); } #ifdef INET /* * Broadcast a gratuitous ARP request containing * the virtual router MAC address for each IP address * associated with the virtual router. */ static void carp_send_arp(struct carp_softc *sc) { struct ifaddr *ifa; struct in_addr addr; NET_EPOCH_ASSERT(); CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET) continue; addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; arp_announce_ifaddr(sc->sc_carpdev, addr, sc->sc_addr); } } int carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) { struct carp_softc *sc = ifa->ifa_carp; if (sc->sc_state == MASTER) { *enaddr = sc->sc_addr; return (1); } return (0); } #endif #ifdef INET6 static void carp_send_na(struct carp_softc *sc) { static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; struct ifaddr *ifa; struct in6_addr *in6; CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6 = IFA_IN6(ifa); nd6_na_output(sc->sc_carpdev, &mcast, in6, ND_NA_FLAG_OVERRIDE, 1, NULL); DELAY(1000); /* XXX */ } } /* * Returns ifa in case it's a carp address and it is MASTER, or if the address * matches and is not a carp address. Returns NULL otherwise. */ struct ifaddr * carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); ifa = NULL; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) continue; if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) ifa = NULL; else ifa_ref(ifa); break; } return (ifa); } char * carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) { struct ifaddr *ifa; NET_EPOCH_ASSERT(); IFNET_FOREACH_IFA(ifp, ifa) if (ifa->ifa_addr->sa_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { struct carp_softc *sc = ifa->ifa_carp; struct m_tag *mtag; mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT); if (mtag == NULL) /* Better a bit than nothing. */ return (sc->sc_addr); bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); return (sc->sc_addr); } return (NULL); } #endif /* INET6 */ int carp_forus(struct ifnet *ifp, u_char *dhost) { struct carp_softc *sc; uint8_t *ena = dhost; if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (0); CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { /* * CARP_LOCK() is not here, since would protect nothing, but * cause deadlock with if_bridge, calling this under its lock. */ if (sc->sc_state == MASTER && !bcmp(dhost, sc->sc_addr, ETHER_ADDR_LEN)) { CIF_UNLOCK(ifp->if_carp); return (1); } } CIF_UNLOCK(ifp->if_carp); return (0); } /* Master down timeout event, executed in callout context. */ static void carp_master_down(void *v) { struct carp_softc *sc = v; struct epoch_tracker et; NET_EPOCH_ENTER(et); CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); if (sc->sc_state == BACKUP) { carp_master_down_locked(sc, "master timed out"); } CURVNET_RESTORE(); CARP_UNLOCK(sc); NET_EPOCH_EXIT(et); } static void carp_master_down_locked(struct carp_softc *sc, const char *reason) { NET_EPOCH_ASSERT(); CARP_LOCK_ASSERT(sc); switch (sc->sc_state) { case BACKUP: carp_set_state(sc, MASTER, reason); send_ad_locked(sc); #ifdef INET carp_send_arp(sc); #endif #ifdef INET6 carp_send_na(sc); #endif carp_setrun(sc, 0); carp_addroute(sc); break; case INIT: case MASTER: #ifdef INVARIANTS panic("carp: VHID %u@%s: master_down event in %s state\n", sc->sc_vhid, if_name(sc->sc_carpdev), sc->sc_state ? "MASTER" : "INIT"); #endif break; } } /* * When in backup state, af indicates whether to reset the master down timer * for v4 or v6. If it's set to zero, reset the ones which are already pending. */ static void carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; int timeout; CARP_LOCK_ASSERT(sc); if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || sc->sc_carpdev->if_link_state != LINK_STATE_UP || (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) || !V_carp_allow) return; switch (sc->sc_state) { case INIT: carp_set_state(sc, BACKUP, "initialization complete"); carp_setrun(sc, 0); break; case BACKUP: callout_stop(&sc->sc_ad_tmo); switch (sc->sc_version) { case CARP_VERSION_CARP: tv.tv_sec = 3 * sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; timeout = tvtohz(&tv); break; case CARP_VERSION_VRRPv3: /* skew time */ timeout = (256 - sc->sc_vrrp_prio) * sc->sc_vrrp_master_inter / 256; timeout += (3 * sc->sc_vrrp_master_inter); timeout *= hz; timeout /= 100; /* master interval is in centiseconds */ break; } switch (af) { #ifdef INET case AF_INET: callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) callout_reset(&sc->sc_md_tmo, timeout, carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) callout_reset(&sc->sc_md6_tmo, timeout, carp_master_down, sc); #endif break; } break; case MASTER: switch (sc->sc_version) { case CARP_VERSION_CARP: tv.tv_sec = sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_callout, sc); break; case CARP_VERSION_VRRPv3: callout_reset(&sc->sc_ad_tmo, sc->sc_vrrp_adv_inter * hz / 100, carp_callout, sc); break; } break; } } /* * Setup multicast structures. */ static int carp_multicast_setup(struct carp_if *cif, sa_family_t sa) { struct ifnet *ifp = cif->cif_ifp; int error = 0; switch (sa) { #ifdef INET case AF_INET: { struct ip_moptions *imo = &cif->cif_imo; struct in_mfilter *imf; struct in_addr addr; if (ip_mfilter_first(&imo->imo_head) != NULL) return (0); imf = ip_mfilter_alloc(M_WAITOK, 0, 0); ip_mfilter_init(&imo->imo_head); imo->imo_multicast_vif = -1; addr.s_addr = htonl(INADDR_CARP_GROUP); if ((error = in_joingroup(ifp, &addr, NULL, &imf->imf_inm)) != 0) { ip_mfilter_free(imf); break; } ip_mfilter_insert(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = CARP_DFLTTL; imo->imo_multicast_loop = 0; break; } #endif #ifdef INET6 case AF_INET6: { struct ip6_moptions *im6o = &cif->cif_im6o; struct in6_mfilter *im6f[2]; struct in6_addr in6; if (ip6_mfilter_first(&im6o->im6o_head)) return (0); im6f[0] = ip6_mfilter_alloc(M_WAITOK, 0, 0); im6f[1] = ip6_mfilter_alloc(M_WAITOK, 0, 0); ip6_mfilter_init(&im6o->im6o_head); im6o->im6o_multicast_hlim = CARP_DFLTTL; im6o->im6o_multicast_ifp = ifp; /* Join IPv6 CARP multicast group. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr8[15] = 0x12; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[0]->im6f_in6m, 0)) != 0) { ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } /* Join solicited multicast address. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr32[1] = 0; in6.s6_addr32[2] = htonl(1); in6.s6_addr32[3] = 0; in6.s6_addr8[12] = 0xff; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } if ((error = in6_joingroup(ifp, &in6, NULL, &im6f[1]->im6f_in6m, 0)) != 0) { in6_leavegroup(im6f[0]->im6f_in6m, NULL); ip6_mfilter_free(im6f[0]); ip6_mfilter_free(im6f[1]); break; } ip6_mfilter_insert(&im6o->im6o_head, im6f[0]); ip6_mfilter_insert(&im6o->im6o_head, im6f[1]); break; } #endif } return (error); } /* * Free multicast structures. */ static void carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) { #ifdef INET struct ip_moptions *imo = &cif->cif_imo; struct in_mfilter *imf; #endif #ifdef INET6 struct ip6_moptions *im6o = &cif->cif_im6o; struct in6_mfilter *im6f; #endif sx_assert(&carp_sx, SA_XLOCKED); switch (sa) { #ifdef INET case AF_INET: if (cif->cif_naddrs != 0) break; while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { ip_mfilter_remove(&imo->imo_head, imf); in_leavegroup(imf->imf_inm, NULL); ip_mfilter_free(imf); } break; #endif #ifdef INET6 case AF_INET6: if (cif->cif_naddrs6 != 0) break; while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { ip6_mfilter_remove(&im6o->im6o_head, im6f); in6_leavegroup(im6f->im6f_in6m, NULL); ip6_mfilter_free(im6f); } break; #endif } } int carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) { struct m_tag *mtag; int vhid; if (!sa) return (0); switch (sa->sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: return (0); } mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); if (mtag == NULL) return (0); bcopy(mtag + 1, &vhid, sizeof(vhid)); /* Set the source MAC address to the Virtual Router MAC Address. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_BRIDGE: case IFT_L2VLAN: { struct ether_header *eh; eh = mtod(m, struct ether_header *); eh->ether_shost[0] = 0; eh->ether_shost[1] = 0; eh->ether_shost[2] = 0x5e; eh->ether_shost[3] = 0; eh->ether_shost[4] = 1; eh->ether_shost[5] = vhid; } break; default: printf("%s: carp is not supported for the %d interface type\n", if_name(ifp), ifp->if_type); return (EOPNOTSUPP); } return (0); } static struct carp_softc* carp_alloc(struct ifnet *ifp, carp_version_t version, int vhid) { struct carp_softc *sc; struct carp_if *cif; sx_assert(&carp_sx, SA_XLOCKED); if ((cif = ifp->if_carp) == NULL) cif = carp_alloc_if(ifp); sc = malloc(sizeof(*sc), M_CARP, M_WAITOK); *sc = (struct carp_softc ){ .sc_vhid = vhid, .sc_version = version, .sc_state = INIT, .sc_carpdev = ifp, .sc_ifasiz = sizeof(struct ifaddr *), .sc_addr = { 0, 0, 0x5e, 0, 1, vhid }, }; sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); switch (version) { case CARP_VERSION_CARP: sc->sc_advbase = CARP_DFLTINTV; sc->sc_init_counter = true; sc->sc_carpaddr.s_addr = htonl(INADDR_CARP_GROUP); sc->sc_carpaddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; sc->sc_carpaddr6.s6_addr8[15] = 0x12; break; case CARP_VERSION_VRRPv3: sc->sc_vrrp_adv_inter = 100; sc->sc_vrrp_master_inter = sc->sc_vrrp_adv_inter; sc->sc_vrrp_prio = 100; break; } CARP_LOCK_INIT(sc); #ifdef INET callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif #ifdef INET6 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); CIF_LOCK(cif); TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carp_list, sc, sc_next); mtx_unlock(&carp_mtx); return (sc); } static void carp_grow_ifas(struct carp_softc *sc) { struct ifaddr **new; new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); CARP_LOCK(sc); bcopy(sc->sc_ifas, new, sc->sc_ifasiz); free(sc->sc_ifas, M_CARP); sc->sc_ifas = new; sc->sc_ifasiz *= 2; CARP_UNLOCK(sc); } static void carp_destroy(struct carp_softc *sc) { struct ifnet *ifp = sc->sc_carpdev; struct carp_if *cif = ifp->if_carp; sx_assert(&carp_sx, SA_XLOCKED); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); CARP_UNLOCK(sc); CIF_LOCK(cif); TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_REMOVE(sc, sc_next); mtx_unlock(&carp_mtx); callout_drain(&sc->sc_ad_tmo); #ifdef INET callout_drain(&sc->sc_md_tmo); #endif #ifdef INET6 callout_drain(&sc->sc_md6_tmo); #endif CARP_LOCK_DESTROY(sc); free(sc->sc_ifas, M_CARP); free(sc, M_CARP); } static struct carp_if* carp_alloc_if(struct ifnet *ifp) { struct carp_if *cif; int error; cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); if ((error = ifpromisc(ifp, 1)) != 0) printf("%s: ifpromisc(%s) failed: %d\n", __func__, if_name(ifp), error); else cif->cif_flags |= CIF_PROMISC; CIF_LOCK_INIT(cif); cif->cif_ifp = ifp; TAILQ_INIT(&cif->cif_vrs); IF_ADDR_WLOCK(ifp); ifp->if_carp = cif; if_ref(ifp); IF_ADDR_WUNLOCK(ifp); return (cif); } static void carp_free_if(struct carp_if *cif) { struct ifnet *ifp = cif->cif_ifp; CIF_LOCK_ASSERT(cif); KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", __func__)); IF_ADDR_WLOCK(ifp); ifp->if_carp = NULL; IF_ADDR_WUNLOCK(ifp); CIF_LOCK_DESTROY(cif); if (cif->cif_flags & CIF_PROMISC) ifpromisc(ifp, 0); if_rele(ifp); free(cif, M_CARP); } static bool carp_carprcp(void *arg, struct carp_softc *sc, int priv) { struct carpreq *carpr = arg; CARP_LOCK(sc); carpr->carpr_state = sc->sc_state; carpr->carpr_vhid = sc->sc_vhid; switch (sc->sc_version) { case CARP_VERSION_CARP: carpr->carpr_advbase = sc->sc_advbase; carpr->carpr_advskew = sc->sc_advskew; if (priv) bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); else bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); break; case CARP_VERSION_VRRPv3: break; } CARP_UNLOCK(sc); return (true); } static int carp_ioctl_set(if_t ifp, struct carpkreq *carpr) { struct epoch_tracker et; struct carp_softc *sc = NULL; int error = 0; if (carpr->carpr_vhid <= 0 || carpr->carpr_vhid > CARP_MAXVHID) return (EINVAL); switch (carpr->carpr_version) { case CARP_VERSION_CARP: if (carpr->carpr_advbase != 0 && (carpr->carpr_advbase > 255 || carpr->carpr_advbase < CARP_DFLTINTV)) return (EINVAL); if (carpr->carpr_advskew < 0 || carpr->carpr_advskew >= 255) return (EINVAL); break; case CARP_VERSION_VRRPv3: /* XXXGL: shouldn't we check anything? */ break; default: return (EINVAL); } if (ifp->if_carp) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr->carpr_vhid) break; } if (sc == NULL) sc = carp_alloc(ifp, carpr->carpr_version, carpr->carpr_vhid); else if (sc->sc_version != carpr->carpr_version) return (EINVAL); CARP_LOCK(sc); switch (sc->sc_version) { case CARP_VERSION_CARP: if (carpr->carpr_advbase != 0) sc->sc_advbase = carpr->carpr_advbase; sc->sc_advskew = carpr->carpr_advskew; if (carpr->carpr_addr.s_addr != INADDR_ANY) sc->sc_carpaddr = carpr->carpr_addr; if (!IN6_IS_ADDR_UNSPECIFIED(&carpr->carpr_addr6)) { memcpy(&sc->sc_carpaddr6, &carpr->carpr_addr6, sizeof(sc->sc_carpaddr6)); } if (carpr->carpr_key[0] != '\0') { bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); carp_hmac_prepare(sc); } break; case CARP_VERSION_VRRPv3: if (carpr->carpr_vrrp_priority != 0) sc->sc_vrrp_prio = carpr->carpr_vrrp_priority; if (carpr->carpr_vrrp_adv_inter) sc->sc_vrrp_adv_inter = carpr->carpr_vrrp_adv_inter; break; } if (sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { switch (carpr->carpr_state) { case BACKUP: callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "user requested via ifconfig"); carp_setrun(sc, 0); carp_delroute(sc); break; case MASTER: NET_EPOCH_ENTER(et); carp_master_down_locked(sc, "user requested via ifconfig"); NET_EPOCH_EXIT(et); break; default: break; } } CARP_UNLOCK(sc); return (error); } static int carp_ioctl_get(if_t ifp, struct ucred *cred, struct carpreq *carpr, bool (*outfn)(void *, struct carp_softc *, int), void *arg) { int priveleged; struct carp_softc *sc; if (carpr->carpr_vhid < 0 || carpr->carpr_vhid > CARP_MAXVHID) return (EINVAL); if (carpr->carpr_count < 1) return (EMSGSIZE); if (ifp->if_carp == NULL) return (ENOENT); priveleged = (priv_check_cred(cred, PRIV_NETINET_CARP) == 0); if (carpr->carpr_vhid != 0) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr->carpr_vhid) break; if (sc == NULL) return (ENOENT); if (! outfn(arg, sc, priveleged)) return (ENOMEM); carpr->carpr_count = 1; } else { int count; count = 0; IFNET_FOREACH_CARP(ifp, sc) count++; if (count > carpr->carpr_count) return (EMSGSIZE); IFNET_FOREACH_CARP(ifp, sc) { if (! outfn(arg, sc, priveleged)) return (ENOMEM); carpr->carpr_count = count; } } return (0); } int carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) { struct carpreq carpr; struct carpkreq carprk = { .carpr_version = CARP_VERSION_CARP, }; struct ifnet *ifp; int error = 0; if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) return (error); ifp = ifunit_ref(ifr->ifr_name); if ((error = carp_is_supported_if(ifp)) != 0) goto out; if ((ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; goto out; } sx_xlock(&carp_sx); switch (cmd) { case SIOCSVH: if ((error = priv_check(td, PRIV_NETINET_CARP))) break; memcpy(&carprk, &carpr, sizeof(carpr)); error = carp_ioctl_set(ifp, &carprk); break; case SIOCGVH: error = carp_ioctl_get(ifp, td->td_ucred, &carpr, carp_carprcp, &carpr); if (error == 0) { error = copyout(&carpr, (char *)ifr_data_get_ptr(ifr), carpr.carpr_count * sizeof(carpr)); } break; default: error = EINVAL; } sx_xunlock(&carp_sx); out: if (ifp != NULL) if_rele(ifp); return (error); } static int carp_get_vhid(struct ifaddr *ifa) { if (ifa == NULL || ifa->ifa_carp == NULL) return (0); return (ifa->ifa_carp->sc_vhid); } int carp_attach(struct ifaddr *ifa, int vhid) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc; int index, error; KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: #endif break; default: return (EPROTOTYPE); } sx_xlock(&carp_sx); if (ifp->if_carp == NULL) { sx_xunlock(&carp_sx); return (ENOPROTOOPT); } IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == vhid) break; if (sc == NULL) { sx_xunlock(&carp_sx); return (ENOENT); } error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); if (error) { CIF_FREE(cif); sx_xunlock(&carp_sx); return (error); } index = sc->sc_naddrs + sc->sc_naddrs6 + 1; if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) carp_grow_ifas(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs++; sc->sc_naddrs++; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6++; sc->sc_naddrs6++; break; #endif } ifa_ref(ifa); CARP_LOCK(sc); sc->sc_ifas[index - 1] = ifa; ifa->ifa_carp = sc; if (sc->sc_version == CARP_VERSION_CARP) carp_hmac_prepare(sc); carp_sc_state(sc); CARP_UNLOCK(sc); sx_xunlock(&carp_sx); return (0); } void carp_detach(struct ifaddr *ifa, bool keep_cif) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc = ifa->ifa_carp; int i, index; KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); sx_xlock(&carp_sx); CARP_LOCK(sc); /* Shift array. */ index = sc->sc_naddrs + sc->sc_naddrs6; for (i = 0; i < index; i++) if (sc->sc_ifas[i] == ifa) break; KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); for (; i < index - 1; i++) sc->sc_ifas[i] = sc->sc_ifas[i+1]; sc->sc_ifas[index - 1] = NULL; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs--; sc->sc_naddrs--; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6--; sc->sc_naddrs6--; break; #endif } carp_ifa_delroute(ifa); carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); ifa->ifa_carp = NULL; ifa_free(ifa); if (sc->sc_version == CARP_VERSION_CARP) carp_hmac_prepare(sc); carp_sc_state(sc); if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) carp_destroy(sc); else CARP_UNLOCK(sc); if (!keep_cif) CIF_FREE(cif); sx_xunlock(&carp_sx); } static void carp_set_state(struct carp_softc *sc, int state, const char *reason) { CARP_LOCK_ASSERT(sc); if (sc->sc_state != state) { const char *carp_states[] = { CARP_STATES }; char subsys[IFNAMSIZ+5]; snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, if_name(sc->sc_carpdev)); CARP_LOG("%s: %s -> %s (%s)\n", subsys, carp_states[sc->sc_state], carp_states[state], reason); sc->sc_state = state; devctl_notify("CARP", subsys, carp_states[state], NULL); } } static void carp_linkstate(struct ifnet *ifp) { struct carp_softc *sc; CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); carp_sc_state(sc); CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); } static void carp_sc_state(struct carp_softc *sc) { CARP_LOCK_ASSERT(sc); if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || !(sc->sc_carpdev->if_flags & IFF_UP) || !V_carp_allow) { callout_stop(&sc->sc_ad_tmo); #ifdef INET callout_stop(&sc->sc_md_tmo); #endif #ifdef INET6 callout_stop(&sc->sc_md6_tmo); #endif carp_set_state(sc, INIT, "hardware interface down"); carp_setrun(sc, 0); carp_delroute(sc); if (!sc->sc_suppress) carp_demote_adj(V_carp_ifdown_adj, "interface down"); sc->sc_suppress = 1; } else { carp_set_state(sc, INIT, "hardware interface up"); carp_setrun(sc, 0); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "interface up"); sc->sc_suppress = 0; } } static void carp_demote_adj(int adj, char *reason) { atomic_add_int(&V_carp_demotion, adj); CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); } static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; struct carp_softc *sc; new = V_carp_allow; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); if (V_carp_allow != new) { V_carp_allow = new; mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) { CARP_LOCK(sc); if (curvnet == sc->sc_carpdev->if_vnet) carp_sc_state(sc); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); } return (0); } static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; new = V_carp_dscp; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); if (new < 0 || new > 63) return (EINVAL); V_carp_dscp = new; return (0); } static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; new = V_carp_demotion; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); carp_demote_adj(new, "sysctl"); return (0); } static int nlattr_get_carp_key(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { if (__predict_false(NLA_DATA_LEN(nla) > CARP_KEY_LEN)) return (EINVAL); memcpy(target, NLA_DATA_CONST(nla), NLA_DATA_LEN(nla)); return (0); } struct carp_nl_send_args { struct nlmsghdr *hdr; struct nl_pstate *npt; }; static bool carp_nl_send(void *arg, struct carp_softc *sc, int priv) { struct carp_nl_send_args *nlsa = arg; struct nlmsghdr *hdr = nlsa->hdr; struct nl_pstate *npt = nlsa->npt; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { nlmsg_abort(nw); return (false); } ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); if (ghdr_new == NULL) { nlmsg_abort(nw); return (false); } ghdr_new->cmd = CARP_NL_CMD_GET; ghdr_new->version = 0; ghdr_new->reserved = 0; CARP_LOCK(sc); nlattr_add_u32(nw, CARP_NL_VHID, sc->sc_vhid); nlattr_add_u32(nw, CARP_NL_STATE, sc->sc_state); nlattr_add_u8(nw, CARP_NL_VERSION, sc->sc_version); switch (sc->sc_version) { case CARP_VERSION_CARP: nlattr_add_s32(nw, CARP_NL_ADVBASE, sc->sc_advbase); nlattr_add_s32(nw, CARP_NL_ADVSKEW, sc->sc_advskew); nlattr_add_in_addr(nw, CARP_NL_ADDR, &sc->sc_carpaddr); nlattr_add_in6_addr(nw, CARP_NL_ADDR6, &sc->sc_carpaddr6); if (priv) nlattr_add(nw, CARP_NL_KEY, sizeof(sc->sc_key), sc->sc_key); break; case CARP_VERSION_VRRPv3: nlattr_add_u8(nw, CARP_NL_VRRP_PRIORITY, sc->sc_vrrp_prio); nlattr_add_u16(nw, CARP_NL_VRRP_ADV_INTER, sc->sc_vrrp_adv_inter); break; } CARP_UNLOCK(sc); if (! nlmsg_end(nw)) { nlmsg_abort(nw); return (false); } return (true); } struct nl_carp_parsed { unsigned int ifindex; char *ifname; uint32_t state; uint32_t vhid; int32_t advbase; int32_t advskew; char key[CARP_KEY_LEN]; struct in_addr addr; struct in6_addr addr6; carp_version_t version; uint8_t vrrp_prio; uint16_t vrrp_adv_inter; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_carp_parsed, _field) static const struct nlattr_parser nla_p_set[] = { { .type = CARP_NL_VHID, .off = _OUT(vhid), .cb = nlattr_get_uint32 }, { .type = CARP_NL_STATE, .off = _OUT(state), .cb = nlattr_get_uint32 }, { .type = CARP_NL_ADVBASE, .off = _OUT(advbase), .cb = nlattr_get_uint32 }, { .type = CARP_NL_ADVSKEW, .off = _OUT(advskew), .cb = nlattr_get_uint32 }, { .type = CARP_NL_KEY, .off = _OUT(key), .cb = nlattr_get_carp_key }, { .type = CARP_NL_IFINDEX, .off = _OUT(ifindex), .cb = nlattr_get_uint32 }, { .type = CARP_NL_ADDR, .off = _OUT(addr), .cb = nlattr_get_in_addr }, { .type = CARP_NL_ADDR6, .off = _OUT(addr6), .cb = nlattr_get_in6_addr }, { .type = CARP_NL_IFNAME, .off = _OUT(ifname), .cb = nlattr_get_string }, { .type = CARP_NL_VERSION, .off = _OUT(version), .cb = nlattr_get_uint8 }, { .type = CARP_NL_VRRP_PRIORITY, .off = _OUT(vrrp_prio), .cb = nlattr_get_uint8 }, { .type = CARP_NL_VRRP_ADV_INTER, .off = _OUT(vrrp_adv_inter), .cb = nlattr_get_uint16 }, }; static const struct nlfield_parser nlf_p_set[] = { }; NL_DECLARE_PARSER(carp_parser, struct genlmsghdr, nlf_p_set, nla_p_set); #undef _IN #undef _OUT static int carp_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_carp_parsed attrs = { }; struct carp_nl_send_args args; struct carpreq carpr = { }; struct epoch_tracker et; if_t ifp = NULL; int error; error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) ifp = ifunit_ref(attrs.ifname); else if (attrs.ifindex != 0) ifp = ifnet_byindex_ref(attrs.ifindex); NET_EPOCH_EXIT(et); if ((error = carp_is_supported_if(ifp)) != 0) goto out; hdr->nlmsg_flags |= NLM_F_MULTI; args.hdr = hdr; args.npt = npt; carpr.carpr_vhid = attrs.vhid; carpr.carpr_count = CARP_MAXVHID; sx_xlock(&carp_sx); error = carp_ioctl_get(ifp, nlp_get_cred(npt->nlp), &carpr, carp_nl_send, &args); sx_xunlock(&carp_sx); if (! nlmsg_end_dump(npt->nw, error, hdr)) error = ENOMEM; out: if (ifp != NULL) if_rele(ifp); return (error); } static int carp_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_carp_parsed attrs = { }; struct carpkreq carpr; struct epoch_tracker et; if_t ifp = NULL; int error; error = nl_parse_nlmsg(hdr, &carp_parser, npt, &attrs); if (error != 0) return (error); if (attrs.vhid <= 0 || attrs.vhid > CARP_MAXVHID) return (EINVAL); if (attrs.state > CARP_MAXSTATE) return (EINVAL); if (attrs.version == 0) /* compat with pre-VRRPv3 */ attrs.version = CARP_VERSION_CARP; switch (attrs.version) { case CARP_VERSION_CARP: if (attrs.advbase < 0 || attrs.advskew < 0) return (EINVAL); if (attrs.advbase > 255) return (EINVAL); if (attrs.advskew >= 255) return (EINVAL); break; case CARP_VERSION_VRRPv3: if (attrs.vrrp_adv_inter > VRRP_MAX_INTERVAL) return (EINVAL); break; default: return (EINVAL); } NET_EPOCH_ENTER(et); if (attrs.ifname != NULL) ifp = ifunit_ref(attrs.ifname); else if (attrs.ifindex != 0) ifp = ifnet_byindex_ref(attrs.ifindex); NET_EPOCH_EXIT(et); if ((error = carp_is_supported_if(ifp)) != 0) goto out; if ((ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; goto out; } carpr.carpr_count = 1; carpr.carpr_vhid = attrs.vhid; carpr.carpr_state = attrs.state; carpr.carpr_version = attrs.version; switch (attrs.version) { case CARP_VERSION_CARP: carpr.carpr_advbase = attrs.advbase; carpr.carpr_advskew = attrs.advskew; carpr.carpr_addr = attrs.addr; carpr.carpr_addr6 = attrs.addr6; memcpy(&carpr.carpr_key, &attrs.key, sizeof(attrs.key)); break; case CARP_VERSION_VRRPv3: carpr.carpr_vrrp_priority = attrs.vrrp_prio; carpr.carpr_vrrp_adv_inter = attrs.vrrp_adv_inter; break; } sx_xlock(&carp_sx); error = carp_ioctl_set(ifp, &carpr); sx_xunlock(&carp_sx); out: if (ifp != NULL) if_rele(ifp); return (error); } static const struct nlhdr_parser *all_parsers[] = { &carp_parser }; static const struct genl_cmd carp_cmds[] = { { .cmd_num = CARP_NL_CMD_GET, .cmd_name = "SIOCGVH", .cmd_cb = carp_nl_get, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, }, { .cmd_num = CARP_NL_CMD_SET, .cmd_name = "SIOCSVH", .cmd_cb = carp_nl_set, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_CARP, }, }; static void carp_nl_register(void) { bool ret __diagused; int family_id __diagused; NL_VERIFY_PARSERS(all_parsers); family_id = genl_register_family(CARP_NL_FAMILY_NAME, 0, 2, CARP_NL_CMD_MAX); MPASS(family_id != 0); ret = genl_register_cmds(CARP_NL_FAMILY_NAME, carp_cmds, - NL_ARRAY_LEN(carp_cmds)); + nitems(carp_cmds)); MPASS(ret); } static void carp_nl_unregister(void) { genl_unregister_family(CARP_NL_FAMILY_NAME); } static void carp_mod_cleanup(void) { carp_nl_unregister(); #ifdef INET (void)ipproto_unregister(IPPROTO_CARP); carp_iamatch_p = NULL; #endif #ifdef INET6 (void)ip6proto_unregister(IPPROTO_CARP); carp_iamatch6_p = NULL; carp_macmatch6_p = NULL; #endif carp_ioctl_p = NULL; carp_attach_p = NULL; carp_detach_p = NULL; carp_get_vhid_p = NULL; carp_linkstate_p = NULL; carp_forus_p = NULL; carp_output_p = NULL; carp_demote_adj_p = NULL; carp_master_p = NULL; mtx_unlock(&carp_mtx); taskqueue_drain(taskqueue_swi, &carp_sendall_task); mtx_destroy(&carp_mtx); sx_destroy(&carp_sx); } static void ipcarp_sysinit(void) { /* Load allow as tunable so to postpone carp start after module load */ TUNABLE_INT_FETCH("net.inet.carp.allow", &V_carp_allow); } VNET_SYSINIT(ip_carp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipcarp_sysinit, NULL); static int carp_mod_load(void) { int err; mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); sx_init(&carp_sx, "carp_sx"); LIST_INIT(&carp_list); carp_get_vhid_p = carp_get_vhid; carp_forus_p = carp_forus; carp_output_p = carp_output; carp_linkstate_p = carp_linkstate; carp_ioctl_p = carp_ioctl; carp_attach_p = carp_attach; carp_detach_p = carp_detach; carp_demote_adj_p = carp_demote_adj; carp_master_p = carp_master; #ifdef INET6 carp_iamatch6_p = carp_iamatch6; carp_macmatch6_p = carp_macmatch6; err = ip6proto_register(IPPROTO_CARP, carp6_input, NULL); if (err) { printf("carp: error %d registering with INET6\n", err); carp_mod_cleanup(); return (err); } #endif #ifdef INET carp_iamatch_p = carp_iamatch; err = ipproto_register(IPPROTO_CARP, carp_input, NULL); if (err) { printf("carp: error %d registering with INET\n", err); carp_mod_cleanup(); return (err); } #endif carp_nl_register(); return (0); } static int carp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: return carp_mod_load(); /* NOTREACHED */ case MOD_UNLOAD: mtx_lock(&carp_mtx); if (LIST_EMPTY(&carp_list)) carp_mod_cleanup(); else { mtx_unlock(&carp_mtx); return (EBUSY); } break; default: return (EINVAL); } return (0); } static moduledata_t carp_mod = { "carp", carp_modevent, 0 }; DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); diff --git a/sys/netlink/netlink.h b/sys/netlink/netlink.h index 2cffa7299ef4..5724385271fc 100644 --- a/sys/netlink/netlink.h +++ b/sys/netlink/netlink.h @@ -1,267 +1,264 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Ng Peng Nam Sean * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (C) The Internet Society (2003). All Rights Reserved. * * This document and translations of it may be copied and furnished to * others, and derivative works that comment on or otherwise explain it * or assist in its implementation may be prepared, copied, published * and distributed, in whole or in part, without restriction of any * kind, provided that the above copyright notice and this paragraph are * included on all such copies and derivative works. However, this * document itself may not be modified in any way, such as by removing * the copyright notice or references to the Internet Society or other * Internet organizations, except as needed for the purpose of * developing Internet standards in which case the procedures for * copyrights defined in the Internet Standards process must be * followed, or as required to translate it into languages other than * English. * * The limited permissions granted above are perpetual and will not be * revoked by the Internet Society or its successors or assignees. * * This document and the information contained herein is provided on an * "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING * TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING * BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION * HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. */ /* * This file contains structures and constants for RFC 3549 (Netlink) * protocol. Some values have been taken from Linux implementation. */ #ifndef _NETLINK_NETLINK_H_ #define _NETLINK_NETLINK_H_ -#include +#include #include struct sockaddr_nl { uint8_t nl_len; /* sizeof(sockaddr_nl) */ sa_family_t nl_family; /* netlink family */ uint16_t nl_pad; /* reserved, set to 0 */ uint32_t nl_pid; /* desired port ID, 0 for auto-select */ uint32_t nl_groups; /* multicast groups mask to bind to */ }; #define SOL_NETLINK 270 /* Netlink socket options */ #define NETLINK_ADD_MEMBERSHIP 1 /* Subscribe for the specified group notifications */ #define NETLINK_DROP_MEMBERSHIP 2 /* Unsubscribe from the specified group */ #define NETLINK_PKTINFO 3 /* XXX: not supported */ #define NETLINK_BROADCAST_ERROR 4 /* XXX: not supported */ #define NETLINK_NO_ENOBUFS 5 /* XXX: not supported */ #define NETLINK_RX_RING 6 /* XXX: not supported */ #define NETLINK_TX_RING 7 /* XXX: not supported */ #define NETLINK_LISTEN_ALL_NSID 8 /* XXX: not supported */ #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 /* Send only original message header in the reply */ #define NETLINK_EXT_ACK 11 /* Ack support for receiving additional TLVs in ack */ #define NETLINK_GET_STRICT_CHK 12 /* Strict header checking */ #define NETLINK_MSG_INFO 257 /* (FreeBSD-specific) Receive message originator data in cmsg */ /* * RFC 3549, 2.3.2 Netlink Message Header */ struct nlmsghdr { uint32_t nlmsg_len; /* Length of message including header */ uint16_t nlmsg_type; /* Message type identifier */ uint16_t nlmsg_flags; /* Flags (NLM_F_) */ uint32_t nlmsg_seq; /* Sequence number */ uint32_t nlmsg_pid; /* Sending process port ID */ }; /* * RFC 3549, 2.3.2 standard flag bits (nlmsg_flags) */ #define NLM_F_REQUEST 0x01 /* Indicateds request to kernel */ #define NLM_F_MULTI 0x02 /* Message is part of a group terminated by NLMSG_DONE msg */ #define NLM_F_ACK 0x04 /* Reply with ack message containing resulting error code */ #define NLM_F_ECHO 0x08 /* (not supported) Echo this request back */ #define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */ #define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */ /* * RFC 3549, 2.3.2 Additional flag bits for GET requests */ #define NLM_F_ROOT 0x100 /* Return the complete table */ #define NLM_F_MATCH 0x200 /* Return all entries matching criteria */ #define NLM_F_ATOMIC 0x400 /* Return an atomic snapshot (ignored) */ #define NLM_F_DUMP (NLM_F_ROOT | NLM_F_MATCH) /* * RFC 3549, 2.3.2 Additional flag bits for NEW requests */ #define NLM_F_REPLACE 0x100 /* Replace existing matching config object */ #define NLM_F_EXCL 0x200 /* Don't replace the object if exists */ #define NLM_F_CREATE 0x400 /* Create if it does not exist */ #define NLM_F_APPEND 0x800 /* Add to end of list */ /* Modifiers to DELETE requests */ #define NLM_F_NONREC 0x100 /* Do not delete recursively */ /* Flags for ACK message */ #define NLM_F_CAPPED 0x100 /* request was capped */ #define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */ /* * RFC 3549, 2.3.2 standard message types (nlmsg_type). */ #define NLMSG_NOOP 0x1 /* Message is ignored. */ #define NLMSG_ERROR 0x2 /* reply error code reporting */ #define NLMSG_DONE 0x3 /* Message terminates a multipart message. */ #define NLMSG_OVERRUN 0x4 /* overrun detected, data is lost */ #define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ /* * Defition of numbers assigned to the netlink subsystems. */ #define NETLINK_ROUTE 0 /* Routing/device hook */ #define NETLINK_UNUSED 1 /* not supported */ #define NETLINK_USERSOCK 2 /* not supported */ #define NETLINK_FIREWALL 3 /* not supported */ #define NETLINK_SOCK_DIAG 4 /* not supported */ #define NETLINK_NFLOG 5 /* not supported */ #define NETLINK_XFRM 6 /* (not supported) PF_SETKEY */ #define NETLINK_SELINUX 7 /* not supported */ #define NETLINK_ISCSI 8 /* not supported */ #define NETLINK_AUDIT 9 /* not supported */ #define NETLINK_FIB_LOOKUP 10 /* not supported */ #define NETLINK_CONNECTOR 11 /* not supported */ #define NETLINK_NETFILTER 12 /* not supported */ #define NETLINK_IP6_FW 13 /* not supported */ #define NETLINK_DNRTMSG 14 /* not supported */ #define NETLINK_KOBJECT_UEVENT 15 /* not supported */ #define NETLINK_GENERIC 16 /* Generic netlink (dynamic families) */ /* * RFC 3549, 2.3.2.2 The ACK Netlink Message */ struct nlmsgerr { int error; struct nlmsghdr msg; }; enum nlmsgerr_attrs { NLMSGERR_ATTR_UNUSED, NLMSGERR_ATTR_MSG = 1, /* string, error message */ NLMSGERR_ATTR_OFFS = 2, /* u32, offset of the invalid attr from nl header */ NLMSGERR_ATTR_COOKIE = 3, /* binary, data to pass to userland */ NLMSGERR_ATTR_POLICY = 4, /* not supported */ __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 }; /* FreeBSD-specific debugging info */ enum nlmsginfo_attrs { NLMSGINFO_ATTR_UNUSED, NLMSGINFO_ATTR_PROCESS_ID = 1, /* u32, source process PID */ NLMSGINFO_ATTR_PORT_ID = 2, /* u32, source socket nl_pid */ NLMSGINFO_ATTR_SEQ_ID = 3, /* u32, source message seq_id */ }; -#ifndef roundup2 -#define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ -#endif #define NL_ITEM_ALIGN_SIZE sizeof(uint32_t) #define NL_ITEM_ALIGN(_len) roundup2(_len, NL_ITEM_ALIGN_SIZE) #define NL_ITEM_DATA(_ptr, _off) ((void *)((char *)(_ptr) + _off)) #define NL_ITEM_DATA_CONST(_ptr, _off) ((const void *)((const char *)(_ptr) + _off)) #define NL_ITEM_OK(_ptr, _len, _hlen, _LEN_M) \ ((_len) >= _hlen && _LEN_M(_ptr) >= _hlen && _LEN_M(_ptr) <= (_len)) #define NL_ITEM_NEXT(_ptr, _LEN_M) ((__typeof(_ptr))((char *)(_ptr) + _LEN_M(_ptr))) #define NL_ITEM_ITER(_ptr, _len, _LEN_MACRO) \ ((_len) -= _LEN_MACRO(_ptr), NL_ITEM_NEXT(_ptr, _LEN_MACRO)) #ifndef _KERNEL /* part of netlink(3) API */ #define NLMSG_ALIGNTO NL_ITEM_ALIGN_SIZE #define NLMSG_ALIGN(_len) NL_ITEM_ALIGN(_len) #define NLMSG_HDRLEN ((int)sizeof(struct nlmsghdr)) #define NLMSG_LENGTH(_len) ((_len) + NLMSG_HDRLEN) #define NLMSG_SPACE(_len) NLMSG_ALIGN(NLMSG_LENGTH(_len)) #define NLMSG_DATA(_hdr) NL_ITEM_DATA(_hdr, NLMSG_HDRLEN) #define _NLMSG_LEN(_hdr) ((int)(_hdr)->nlmsg_len) #define _NLMSG_ALIGNED_LEN(_hdr) NLMSG_ALIGN(_NLMSG_LEN(_hdr)) #define NLMSG_OK(_hdr, _len) NL_ITEM_OK(_hdr, _len, NLMSG_HDRLEN, _NLMSG_LEN) #define NLMSG_PAYLOAD(_hdr,_len) (_NLMSG_LEN(_hdr) - NLMSG_SPACE((_len))) #define NLMSG_NEXT(_hdr, _len) NL_ITEM_ITER(_hdr, _len, _NLMSG_ALIGNED_LEN) #else #define NLMSG_ALIGNTO 4U #define NLMSG_ALIGN(len) (((len) + NLMSG_ALIGNTO - 1) & ~(NLMSG_ALIGNTO - 1)) #define NLMSG_HDRLEN ((int)NLMSG_ALIGN(sizeof(struct nlmsghdr))) #endif /* * Base netlink attribute TLV header. */ struct nlattr { uint16_t nla_len; /* Total attribute length */ uint16_t nla_type; /* Attribute type */ }; /* * * nl_type field enconding: * * 0 1 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |N|O| Attribute type | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * N - attribute contains other attributes (mostly unused) * O - encoded in network byte order (mostly unused) * Note: N & O are mutually exclusive * * Note: attribute type value scope normally is either parent attribute * or the message/message group. */ #define NLA_F_NESTED (1 << 15) #define NLA_F_NET_BYTEORDER (1 << 14) #define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) #ifndef _KERNEL #define NLA_ALIGNTO NL_ITEM_ALIGN_SIZE #define NLA_ALIGN(_len) NL_ITEM_ALIGN(_len) #define NLA_HDRLEN ((int)sizeof(struct nlattr)) #endif #endif diff --git a/sys/netlink/netlink_ctl.h b/sys/netlink/netlink_ctl.h index c2ac3e99421e..95b79c763ccd 100644 --- a/sys/netlink/netlink_ctl.h +++ b/sys/netlink/netlink_ctl.h @@ -1,129 +1,127 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETLINK_NETLINK_CTL_H_ #define _NETLINK_NETLINK_CTL_H_ #ifdef _KERNEL /* * This file provides headers for the public KPI of the netlink * subsystem */ #include MALLOC_DECLARE(M_NETLINK); /* * Macro for handling attribute TLVs */ #define _roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) #define NETLINK_ALIGN_SIZE sizeof(uint32_t) #define NETLINK_ALIGN(_len) _roundup2(_len, NETLINK_ALIGN_SIZE) #define NLA_ALIGN_SIZE sizeof(uint32_t) #define NLA_ALIGN(_len) _roundup2(_len, NLA_ALIGN_SIZE) #define NLA_HDRLEN ((int)sizeof(struct nlattr)) #define NLA_DATA_LEN(_nla) ((int)((_nla)->nla_len - NLA_HDRLEN)) #define NLA_DATA(_nla) NL_ITEM_DATA(_nla, NLA_HDRLEN) #define NLA_DATA_CONST(_nla) NL_ITEM_DATA_CONST(_nla, NLA_HDRLEN) #define NLA_TYPE(_nla) ((_nla)->nla_type & 0x3FFF) #ifndef typeof #define typeof __typeof #endif #define NLA_NEXT(_attr) (struct nlattr *)((char *)_attr + NLA_ALIGN(_attr->nla_len)) #define _NLA_END(_start, _len) ((char *)(_start) + (_len)) #define NLA_FOREACH(_attr, _start, _len) \ for (typeof(_attr) _end = (typeof(_attr))_NLA_END(_start, _len), _attr = (_start); \ ((char *)_attr < (char *)_end) && \ ((char *)NLA_NEXT(_attr) <= (char *)_end); \ _attr = (_len -= NLA_ALIGN(_attr->nla_len), NLA_NEXT(_attr))) -#define NL_ARRAY_LEN(_a) (sizeof(_a) / sizeof((_a)[0])) - #include #include /* Protocol handlers */ struct nl_pstate; typedef int (*nl_handler_f)(struct nlmsghdr *hdr, struct nl_pstate *npt); bool netlink_register_proto(int proto, const char *proto_name, nl_handler_f handler); bool netlink_unregister_proto(int proto); /* Common helpers */ bool nl_has_listeners(uint16_t netlink_family, uint32_t groups_mask); bool nlp_has_priv(struct nlpcb *nlp, int priv); struct ucred *nlp_get_cred(struct nlpcb *nlp); uint32_t nlp_get_pid(const struct nlpcb *nlp); bool nlp_unconstrained_vnet(const struct nlpcb *nlp); /* netlink_generic.c */ struct genl_cmd { const char *cmd_name; nl_handler_f cmd_cb; uint32_t cmd_flags; uint32_t cmd_priv; uint32_t cmd_num; }; uint32_t genl_register_family(const char *family_name, size_t hdrsize, uint16_t family_version, uint16_t max_attr_idx); bool genl_unregister_family(const char *family_name); bool genl_register_cmds(const char *family_name, const struct genl_cmd *cmds, int count); uint32_t genl_register_group(const char *family_name, const char *group_name); struct genl_family; const char *genl_get_family_name(const struct genl_family *gf); uint16_t genl_get_family_id(const struct genl_family *gf); typedef void (*genl_family_event_handler_t)(void *arg, const struct genl_family *gf, int action); EVENTHANDLER_DECLARE(genl_family_event, genl_family_event_handler_t); struct thread; #if defined(NETLINK) || defined(NETLINK_MODULE) /* Provide optimized calls to the functions inside the same linking unit */ struct nlpcb *_nl_get_thread_nlp(struct thread *td); static inline struct nlpcb * nl_get_thread_nlp(struct thread *td) { return (_nl_get_thread_nlp(td)); } #else /* Provide access to the functions via netlink_glue.c */ struct nlpcb *nl_get_thread_nlp(struct thread *td); #endif /* defined(NETLINK) || defined(NETLINK_MODULE) */ #endif #endif diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c index 4a3c83da57e1..0f960d79f477 100644 --- a/sys/netlink/netlink_generic.c +++ b/sys/netlink/netlink_generic.c @@ -1,303 +1,303 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_generic #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); static int dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr, const struct genl_family *gf, struct nl_writer *nw); /* * Handler called by netlink subsystem when matching netlink message is received */ static int genl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nlpcb *nlp = npt->nlp; struct genl_family *gf = NULL; int error = 0; int family_id = (int)hdr->nlmsg_type - GENL_MIN_ID; if (__predict_false(family_id < 0 || (gf = genl_get_family(family_id)) == NULL)) { NLP_LOG(LOG_DEBUG, nlp, "invalid message type: %d", hdr->nlmsg_type); return (ENOTSUP); } if (__predict_false(hdr->nlmsg_len < sizeof(hdr) + GENL_HDRLEN)) { NLP_LOG(LOG_DEBUG, nlp, "invalid message size: %d", hdr->nlmsg_len); return (EINVAL); } struct genlmsghdr *ghdr = (struct genlmsghdr *)(hdr + 1); if (ghdr->cmd >= gf->family_cmd_size || gf->family_cmds[ghdr->cmd].cmd_cb == NULL) { NLP_LOG(LOG_DEBUG, nlp, "family %s: invalid cmd %d", gf->family_name, ghdr->cmd); return (ENOTSUP); } struct genl_cmd *cmd = &gf->family_cmds[ghdr->cmd]; if (cmd->cmd_priv != 0 && !nlp_has_priv(nlp, cmd->cmd_priv)) { NLP_LOG(LOG_DEBUG, nlp, "family %s: cmd %d priv_check() failed", gf->family_name, ghdr->cmd); return (EPERM); } NLP_LOG(LOG_DEBUG2, nlp, "received family %s cmd %s(%d) len %d", gf->family_name, cmd->cmd_name, ghdr->cmd, hdr->nlmsg_len); error = cmd->cmd_cb(hdr, npt); return (error); } static uint32_t get_cmd_flags(const struct genl_cmd *cmd) { uint32_t flags = cmd->cmd_flags; if (cmd->cmd_priv != 0) flags |= GENL_ADMIN_PERM; return (flags); } static int dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr, const struct genl_family *gf, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) goto enomem; struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = ghdr->cmd; ghdr_new->version = gf->family_version; ghdr_new->reserved = 0; nlattr_add_string(nw, CTRL_ATTR_FAMILY_NAME, gf->family_name); nlattr_add_u16(nw, CTRL_ATTR_FAMILY_ID, gf->family_id); nlattr_add_u32(nw, CTRL_ATTR_VERSION, gf->family_version); nlattr_add_u32(nw, CTRL_ATTR_HDRSIZE, gf->family_hdrsize); nlattr_add_u32(nw, CTRL_ATTR_MAXATTR, gf->family_attr_max); if (gf->family_cmd_size > 0) { int off = nlattr_add_nested(nw, CTRL_ATTR_OPS); if (off == 0) goto enomem; for (int i = 0, cnt=0; i < gf->family_cmd_size; i++) { struct genl_cmd *cmd = &gf->family_cmds[i]; if (cmd->cmd_cb == NULL) continue; int cmd_off = nlattr_add_nested(nw, ++cnt); if (cmd_off == 0) goto enomem; nlattr_add_u32(nw, CTRL_ATTR_OP_ID, cmd->cmd_num); nlattr_add_u32(nw, CTRL_ATTR_OP_FLAGS, get_cmd_flags(cmd)); nlattr_set_len(nw, cmd_off); } nlattr_set_len(nw, off); } if (gf->family_num_groups > 0) { int off = nlattr_add_nested(nw, CTRL_ATTR_MCAST_GROUPS); if (off == 0) goto enomem; for (int i = 0, cnt = 0; i < MAX_GROUPS; i++) { struct genl_group *gg = genl_get_group(i); if (gg == NULL || gg->group_family != gf) continue; int cmd_off = nlattr_add_nested(nw, ++cnt); if (cmd_off == 0) goto enomem; nlattr_add_u32(nw, CTRL_ATTR_MCAST_GRP_ID, i + MIN_GROUP_NUM); nlattr_add_string(nw, CTRL_ATTR_MCAST_GRP_NAME, gg->group_name); nlattr_set_len(nw, cmd_off); } nlattr_set_len(nw, off); } if (nlmsg_end(nw)) return (0); enomem: NL_LOG(LOG_DEBUG, "unable to dump family %s state (ENOMEM)", gf->family_name); nlmsg_abort(nw); return (ENOMEM); } /* Declare ourself as a user */ static void nlctrl_notify(void *arg, const struct genl_family *gf, int action); static eventhandler_tag family_event_tag; static uint32_t ctrl_family_id; static uint32_t ctrl_group_id; struct nl_parsed_family { uint32_t family_id; char *family_name; uint8_t version; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_parsed_family, _field) static const struct nlfield_parser nlf_p_generic[] = { { .off_in = _IN(version), .off_out = _OUT(version), .cb = nlf_get_u8 }, }; static struct nlattr_parser nla_p_generic[] = { { .type = CTRL_ATTR_FAMILY_ID , .off = _OUT(family_id), .cb = nlattr_get_uint16 }, { .type = CTRL_ATTR_FAMILY_NAME , .off = _OUT(family_name), .cb = nlattr_get_string }, }; #undef _IN #undef _OUT NL_DECLARE_PARSER(genl_parser, struct genlmsghdr, nlf_p_generic, nla_p_generic); static bool match_family(const struct genl_family *gf, const struct nl_parsed_family *attrs) { if (gf->family_name == NULL) return (false); if (attrs->family_id != 0 && attrs->family_id != gf->family_id) return (false); if (attrs->family_name != NULL && strcmp(attrs->family_name, gf->family_name)) return (false); return (true); } static int nlctrl_handle_getfamily(struct nlmsghdr *hdr, struct nl_pstate *npt) { int error = 0; struct nl_parsed_family attrs = {}; error = nl_parse_nlmsg(hdr, &genl_parser, npt, &attrs); if (error != 0) return (error); struct genlmsghdr ghdr = { .cmd = CTRL_CMD_NEWFAMILY, }; if (attrs.family_id != 0 || attrs.family_name != NULL) { /* Resolve request */ for (int i = 0; i < MAX_FAMILIES; i++) { struct genl_family *gf = genl_get_family(i); if (gf != NULL && match_family(gf, &attrs)) { error = dump_family(hdr, &ghdr, gf, npt->nw); return (error); } } return (ENOENT); } hdr->nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI; for (int i = 0; i < MAX_FAMILIES; i++) { struct genl_family *gf = genl_get_family(i); if (gf != NULL && match_family(gf, &attrs)) { error = dump_family(hdr, &ghdr, gf, npt->nw); if (error != 0) break; } } if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static void nlctrl_notify(void *arg __unused, const struct genl_family *gf, int cmd) { struct nlmsghdr hdr = {.nlmsg_type = NETLINK_GENERIC }; struct genlmsghdr ghdr = { .cmd = cmd }; struct nl_writer nw; if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_GENERIC, ctrl_group_id, false)) { NL_LOG(LOG_DEBUG, "error allocating group writer"); return; } dump_family(&hdr, &ghdr, gf, &nw); nlmsg_flush(&nw); } static const struct genl_cmd nlctrl_cmds[] = { { .cmd_num = CTRL_CMD_GETFAMILY, .cmd_name = "GETFAMILY", .cmd_cb = nlctrl_handle_getfamily, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, }, }; static const struct nlhdr_parser *all_parsers[] = { &genl_parser }; static void genl_load_all(void *u __unused) { NL_VERIFY_PARSERS(all_parsers); ctrl_family_id = genl_register_family(CTRL_FAMILY_NAME, 0, 2, CTRL_ATTR_MAX); - genl_register_cmds(CTRL_FAMILY_NAME, nlctrl_cmds, NL_ARRAY_LEN(nlctrl_cmds)); + genl_register_cmds(CTRL_FAMILY_NAME, nlctrl_cmds, nitems(nlctrl_cmds)); ctrl_group_id = genl_register_group(CTRL_FAMILY_NAME, "notify"); family_event_tag = EVENTHANDLER_REGISTER(genl_family_event, nlctrl_notify, NULL, EVENTHANDLER_PRI_ANY); netlink_register_proto(NETLINK_GENERIC, "NETLINK_GENERIC", genl_handle_message); } SYSINIT(genl_load_all, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_load_all, NULL); static void genl_unload(void *u __unused) { netlink_unregister_proto(NETLINK_GENERIC); EVENTHANDLER_DEREGISTER(genl_family_event, family_event_tag); genl_unregister_family(CTRL_FAMILY_NAME); NET_EPOCH_WAIT(); } SYSUNINIT(genl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_unload, NULL); diff --git a/sys/netlink/netlink_message_parser.h b/sys/netlink/netlink_message_parser.h index 49bec646b29e..42a59f45b22a 100644 --- a/sys/netlink/netlink_message_parser.h +++ b/sys/netlink/netlink_message_parser.h @@ -1,333 +1,333 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETLINK_NETLINK_MESSAGE_PARSER_H_ #define _NETLINK_NETLINK_MESSAGE_PARSER_H_ #ifdef _KERNEL #include /* * It is not meant to be included directly */ /* Parsing state */ struct linear_buffer { char *base; /* Base allocated memory pointer */ uint32_t offset; /* Currently used offset */ uint32_t size; /* Total buffer size */ } __aligned(_Alignof(__max_align_t)); static inline void * lb_alloc(struct linear_buffer *lb, int len) { len = roundup2(len, _Alignof(__max_align_t)); if (lb->offset + len > lb->size) return (NULL); void *data = (void *)(lb->base + lb->offset); lb->offset += len; return (data); } static inline void lb_clear(struct linear_buffer *lb) { memset(lb->base, 0, lb->size); lb->offset = 0; } #define NL_MAX_ERROR_BUF 128 #define SCRATCH_BUFFER_SIZE (1024 + NL_MAX_ERROR_BUF) struct nl_pstate { struct linear_buffer lb; /* Per-message scratch buffer */ struct nlpcb *nlp; /* Originator socket */ struct nl_writer *nw; /* Message writer to use */ struct nlmsghdr *hdr; /* Current parsed message header */ uint32_t err_off; /* error offset from hdr start */ int error; /* last operation error */ char *err_msg; /* Description of last error */ struct nlattr *cookie; /* NLA to return to the userspace */ bool strict; /* Strict parsing required */ }; static inline void * npt_alloc(struct nl_pstate *npt, int len) { return (lb_alloc(&npt->lb, len)); } #define npt_alloc_sockaddr(_npt, _len) ((struct sockaddr *)(npt_alloc(_npt, _len))) typedef int parse_field_f(void *hdr, struct nl_pstate *npt, void *target); struct nlfield_parser { uint16_t off_in; uint16_t off_out; parse_field_f *cb; }; static const struct nlfield_parser nlf_p_empty[] = {}; int nlf_get_ifp(void *src, struct nl_pstate *npt, void *target); int nlf_get_ifpz(void *src, struct nl_pstate *npt, void *target); int nlf_get_u8(void *src, struct nl_pstate *npt, void *target); int nlf_get_u16(void *src, struct nl_pstate *npt, void *target); int nlf_get_u32(void *src, struct nl_pstate *npt, void *target); int nlf_get_u8_u32(void *src, struct nl_pstate *npt, void *target); struct nlattr_parser; typedef int parse_attr_f(struct nlattr *attr, struct nl_pstate *npt, const void *arg, void *target); struct nlattr_parser { uint16_t type; /* Attribute type */ uint16_t off; /* field offset in the target structure */ parse_attr_f *cb; /* parser function to call */ const void *arg; }; typedef bool strict_parser_f(void *hdr, struct nl_pstate *npt); typedef bool post_parser_f(void *parsed_attrs, struct nl_pstate *npt); struct nlhdr_parser { int nl_hdr_off; /* aligned netlink header size */ int out_hdr_off; /* target header size */ int fp_size; int np_size; const struct nlfield_parser *fp; /* array of header field parsers */ const struct nlattr_parser *np; /* array of attribute parsers */ strict_parser_f *sp; /* Pre-parse strict validation function */ post_parser_f *post_parse; }; #define NL_DECLARE_PARSER_EXT(_name, _t, _sp, _fp, _np, _pp) \ static const struct nlhdr_parser _name = { \ .nl_hdr_off = sizeof(_t), \ .fp = &((_fp)[0]), \ .np = &((_np)[0]), \ - .fp_size = NL_ARRAY_LEN(_fp), \ - .np_size = NL_ARRAY_LEN(_np), \ + .fp_size = nitems(_fp), \ + .np_size = nitems(_np), \ .sp = _sp, \ .post_parse = _pp, \ } #define NL_DECLARE_PARSER(_name, _t, _fp, _np) \ NL_DECLARE_PARSER_EXT(_name, _t, NULL, _fp, _np, NULL) #define NL_DECLARE_STRICT_PARSER(_name, _t, _sp, _fp, _np) \ NL_DECLARE_PARSER_EXT(_name, _t, _sp, _fp, _np, NULL) #define NL_DECLARE_ARR_PARSER(_name, _t, _o, _fp, _np) \ static const struct nlhdr_parser _name = { \ .nl_hdr_off = sizeof(_t), \ .out_hdr_off = sizeof(_o), \ .fp = &((_fp)[0]), \ .np = &((_np)[0]), \ - .fp_size = NL_ARRAY_LEN(_fp), \ - .np_size = NL_ARRAY_LEN(_np), \ + .fp_size = nitems(_fp), \ + .np_size = nitems(_np), \ } #define NL_DECLARE_ATTR_PARSER_EXT(_name, _np, _pp) \ static const struct nlhdr_parser _name = { \ .np = &((_np)[0]), \ - .np_size = NL_ARRAY_LEN(_np), \ + .np_size = nitems(_np), \ .post_parse = (_pp) \ } #define NL_DECLARE_ATTR_PARSER(_name, _np) \ NL_DECLARE_ATTR_PARSER_EXT(_name, _np, NULL) #define NL_ATTR_BMASK_SIZE 128 BITSET_DEFINE(nlattr_bmask, NL_ATTR_BMASK_SIZE); void nl_get_attrs_bmask_raw(struct nlattr *nla_head, int len, struct nlattr_bmask *bm); bool nl_has_attr(const struct nlattr_bmask *bm, unsigned int nla_type); int nl_parse_attrs_raw(struct nlattr *nla_head, int len, const struct nlattr_parser *ps, int pslen, struct nl_pstate *npt, void *target); int nlattr_get_flag(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_ip(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_bool(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_uint8(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_uint16(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_uint32(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_uint64(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_in_addr(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_in6_addr(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_ifp(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_ifpz(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_ipvia(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_chara(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_string(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_stringn(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_bytes(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_nla(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_nested(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); int nlattr_get_nested_ptr(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target); bool nlmsg_report_err_msg(struct nl_pstate *npt, const char *fmt, ...); #define NLMSG_REPORT_ERR_MSG(_npt, _fmt, ...) { \ nlmsg_report_err_msg(_npt, _fmt, ## __VA_ARGS__); \ NLP_LOG(LOG_DEBUG, (_npt)->nlp, _fmt, ## __VA_ARGS__); \ } bool nlmsg_report_err_offset(struct nl_pstate *npt, uint32_t off); void nlmsg_report_cookie(struct nl_pstate *npt, struct nlattr *nla); void nlmsg_report_cookie_u32(struct nl_pstate *npt, uint32_t val); /* * Have it inline so compiler can optimize field accesses into * the list of direct function calls without iteration. */ static inline int nl_parse_header(void *hdr, int len, const struct nlhdr_parser *parser, struct nl_pstate *npt, void *target) { int error; if (__predict_false(len < parser->nl_hdr_off)) { if (npt->strict) { nlmsg_report_err_msg(npt, "header too short: expected %d, got %d", parser->nl_hdr_off, len); return (EINVAL); } /* Compat with older applications: pretend there's a full header */ void *tmp_hdr = npt_alloc(npt, parser->nl_hdr_off); if (tmp_hdr == NULL) return (EINVAL); memcpy(tmp_hdr, hdr, len); hdr = tmp_hdr; len = parser->nl_hdr_off; } if (npt->strict && parser->sp != NULL && !parser->sp(hdr, npt)) return (EINVAL); /* Extract fields first */ for (int i = 0; i < parser->fp_size; i++) { const struct nlfield_parser *fp = &parser->fp[i]; void *src = (char *)hdr + fp->off_in; void *dst = (char *)target + fp->off_out; error = fp->cb(src, npt, dst); if (error != 0) return (error); } struct nlattr *nla_head = (struct nlattr *)((char *)hdr + parser->nl_hdr_off); error = nl_parse_attrs_raw(nla_head, len - parser->nl_hdr_off, parser->np, parser->np_size, npt, target); if (parser->post_parse != NULL && error == 0) { if (!parser->post_parse(target, npt)) return (EINVAL); } return (error); } static inline int nl_parse_nested(struct nlattr *nla, const struct nlhdr_parser *parser, struct nl_pstate *npt, void *target) { struct nlattr *nla_head = (struct nlattr *)NLA_DATA(nla); return (nl_parse_attrs_raw(nla_head, NLA_DATA_LEN(nla), parser->np, parser->np_size, npt, target)); } /* * Checks that attributes are sorted by attribute type. */ static inline void nl_verify_parsers(const struct nlhdr_parser **parser, int count) { #ifdef INVARIANTS for (int i = 0; i < count; i++) { const struct nlhdr_parser *p = parser[i]; int attr_type = 0; for (int j = 0; j < p->np_size; j++) { MPASS(p->np[j].type > attr_type); attr_type = p->np[j].type; /* Recurse into nested objects. */ if (p->np[j].cb == nlattr_get_nested || p->np[j].cb == nlattr_get_nested_ptr) { const struct nlhdr_parser *np = (const struct nlhdr_parser *)p->np[j].arg; nl_verify_parsers(&np, 1); } } } #endif } void nl_verify_parsers(const struct nlhdr_parser **parser, int count); -#define NL_VERIFY_PARSERS(_p) nl_verify_parsers((_p), NL_ARRAY_LEN(_p)) +#define NL_VERIFY_PARSERS(_p) nl_verify_parsers((_p), nitems(_p)) static inline int nl_parse_nlmsg(struct nlmsghdr *hdr, const struct nlhdr_parser *parser, struct nl_pstate *npt, void *target) { return (nl_parse_header(hdr + 1, hdr->nlmsg_len - sizeof(*hdr), parser, npt, target)); } static inline void nl_get_attrs_bmask_nlmsg(struct nlmsghdr *hdr, const struct nlhdr_parser *parser, struct nlattr_bmask *bm) { struct nlattr *nla_head; nla_head = (struct nlattr *)((char *)(hdr + 1) + parser->nl_hdr_off); int len = hdr->nlmsg_len - sizeof(*hdr) - parser->nl_hdr_off; nl_get_attrs_bmask_raw(nla_head, len, bm); } #endif #endif diff --git a/sys/netlink/netlink_snl.h b/sys/netlink/netlink_snl.h index 7151f6221fca..2861ea93bafb 100644 --- a/sys/netlink/netlink_snl.h +++ b/sys/netlink/netlink_snl.h @@ -1,1320 +1,1319 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETLINK_NETLINK_SNL_H_ #define _NETLINK_NETLINK_SNL_H_ /* * Simple Netlink Library */ +#include +#include + +#include +#include + #include #include #include #include #include #include #include #include #include -#include -#include -#include -#include - #define _roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) #define NETLINK_ALIGN_SIZE sizeof(uint32_t) #define NETLINK_ALIGN(_len) _roundup2(_len, NETLINK_ALIGN_SIZE) #define NLA_ALIGN_SIZE sizeof(uint32_t) #define NLA_HDRLEN ((int)sizeof(struct nlattr)) #define NLA_DATA_LEN(_nla) ((int)((_nla)->nla_len - NLA_HDRLEN)) #define NLA_DATA(_nla) NL_ITEM_DATA(_nla, NLA_HDRLEN) #define NLA_DATA_CONST(_nla) NL_ITEM_DATA_CONST(_nla, NLA_HDRLEN) #define NLA_TYPE(_nla) ((_nla)->nla_type & 0x3FFF) #define NLA_NEXT(_attr) (struct nlattr *)(void *)((char *)_attr + NLA_ALIGN(_attr->nla_len)) #define _NLA_END(_start, _len) ((char *)(_start) + (_len)) #define NLA_FOREACH(_attr, _start, _len) \ for (_attr = (struct nlattr *)(_start); \ ((char *)_attr < _NLA_END(_start, _len)) && \ ((char *)NLA_NEXT(_attr) <= _NLA_END(_start, _len)); \ _attr = NLA_NEXT(_attr)) -#define NL_ARRAY_LEN(_a) (sizeof(_a) / sizeof((_a)[0])) - struct linear_buffer { char *base; /* Base allocated memory pointer */ uint32_t offset; /* Currently used offset */ uint32_t size; /* Total buffer size */ struct linear_buffer *next; /* Buffer chaining */ } __aligned(alignof(__max_align_t)); static inline struct linear_buffer * lb_init(uint32_t size) { struct linear_buffer *lb = (struct linear_buffer *)calloc(1, size); if (lb != NULL) { lb->base = (char *)(lb + 1); lb->size = size - sizeof(*lb); } return (lb); } static inline void lb_free(struct linear_buffer *lb) { free(lb); } static inline char * lb_allocz(struct linear_buffer *lb, int len) { len = roundup2(len, alignof(__max_align_t)); if (lb->offset + len > lb->size) return (NULL); char *data = (lb->base + lb->offset); lb->offset += len; return (data); } static inline void lb_clear(struct linear_buffer *lb) { memset(lb->base, 0, lb->offset); lb->offset = 0; } struct snl_state { int fd; char *buf; size_t off; size_t bufsize; size_t datalen; uint32_t seq; bool init_done; struct linear_buffer *lb; }; #define SCRATCH_BUFFER_SIZE 1024 #define SNL_WRITER_BUFFER_SIZE 256 typedef void snl_parse_field_f(struct snl_state *ss, void *hdr, void *target); struct snl_field_parser { uint16_t off_in; uint16_t off_out; snl_parse_field_f *cb; }; typedef bool snl_parse_attr_f(struct snl_state *ss, struct nlattr *attr, const void *arg, void *target); struct snl_attr_parser { uint16_t type; /* Attribute type */ uint16_t off; /* field offset in the target structure */ snl_parse_attr_f *cb; /* parser function to call */ /* Optional parser argument */ union { const void *arg; const uint32_t arg_u32; }; }; typedef bool snl_parse_post_f(struct snl_state *ss, void *target); struct snl_hdr_parser { uint16_t in_hdr_size; /* Input header size */ uint16_t out_size; /* Output structure size */ uint16_t fp_size; /* Number of items in field parser */ uint16_t np_size; /* Number of items in attribute parser */ const struct snl_field_parser *fp; /* array of header field parsers */ const struct snl_attr_parser *np; /* array of attribute parsers */ snl_parse_post_f *cb_post; /* post-parse callback */ }; #define SNL_DECLARE_PARSER_EXT(_name, _sz_h_in, _sz_out, _fp, _np, _cb) \ static const struct snl_hdr_parser _name = { \ .in_hdr_size = _sz_h_in, \ .out_size = _sz_out, \ .fp = &((_fp)[0]), \ .np = &((_np)[0]), \ - .fp_size = NL_ARRAY_LEN(_fp), \ - .np_size = NL_ARRAY_LEN(_np), \ + .fp_size = nitems(_fp), \ + .np_size = nitems(_np), \ .cb_post = _cb, \ } #define SNL_DECLARE_PARSER(_name, _t, _fp, _np) \ SNL_DECLARE_PARSER_EXT(_name, sizeof(_t), 0, _fp, _np, NULL) #define SNL_DECLARE_FIELD_PARSER_EXT(_name, _sz_h_in, _sz_out, _fp, _cb) \ static const struct snl_hdr_parser _name = { \ .in_hdr_size = _sz_h_in, \ .out_size = _sz_out, \ .fp = &((_fp)[0]), \ - .fp_size = NL_ARRAY_LEN(_fp), \ + .fp_size = nitems(_fp), \ .cb_post = _cb, \ } #define SNL_DECLARE_FIELD_PARSER(_name, _t, _fp) \ SNL_DECLARE_FIELD_PARSER_EXT(_name, sizeof(_t), 0, _fp, NULL) #define SNL_DECLARE_ATTR_PARSER_EXT(_name, _sz_out, _np, _cb) \ static const struct snl_hdr_parser _name = { \ .out_size = _sz_out, \ .np = &((_np)[0]), \ - .np_size = NL_ARRAY_LEN(_np), \ + .np_size = nitems(_np), \ .cb_post = _cb, \ } #define SNL_DECLARE_ATTR_PARSER(_name, _np) \ SNL_DECLARE_ATTR_PARSER_EXT(_name, 0, _np, NULL) static inline void * snl_allocz(struct snl_state *ss, int len) { void *data = lb_allocz(ss->lb, len); if (data == NULL) { uint32_t size = ss->lb->size * 2; while (size < len + sizeof(struct linear_buffer)) size *= 2; struct linear_buffer *lb = lb_init(size); if (lb != NULL) { lb->next = ss->lb; ss->lb = lb; data = lb_allocz(ss->lb, len); } } return (data); } static inline void snl_clear_lb(struct snl_state *ss) { struct linear_buffer *lb = ss->lb; lb_clear(lb); lb = lb->next; ss->lb->next = NULL; /* Remove all linear bufs except the largest one */ while (lb != NULL) { struct linear_buffer *lb_next = lb->next; lb_free(lb); lb = lb_next; } } static void snl_free(struct snl_state *ss) { if (ss->init_done) { close(ss->fd); if (ss->buf != NULL) free(ss->buf); if (ss->lb != NULL) { snl_clear_lb(ss); lb_free(ss->lb); } } } static inline bool snl_init(struct snl_state *ss, int netlink_family) { memset(ss, 0, sizeof(*ss)); ss->fd = socket(AF_NETLINK, SOCK_RAW, netlink_family); if (ss->fd == -1) return (false); ss->init_done = true; int val = 1; socklen_t optlen = sizeof(val); if (setsockopt(ss->fd, SOL_NETLINK, NETLINK_EXT_ACK, &val, optlen) == -1) { snl_free(ss); return (false); } int rcvbuf; if (getsockopt(ss->fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, &optlen) == -1) { snl_free(ss); return (false); } ss->bufsize = rcvbuf; ss->buf = (char *)malloc(ss->bufsize); if (ss->buf == NULL) { snl_free(ss); return (false); } ss->lb = lb_init(SCRATCH_BUFFER_SIZE); if (ss->lb == NULL) { snl_free(ss); return (false); } return (true); } static inline bool snl_send(struct snl_state *ss, void *data, int sz) { return (send(ss->fd, data, sz, 0) == sz); } static inline bool snl_send_message(struct snl_state *ss, struct nlmsghdr *hdr) { ssize_t sz = NLMSG_ALIGN(hdr->nlmsg_len); return (send(ss->fd, hdr, sz, 0) == sz); } static inline uint32_t snl_get_seq(struct snl_state *ss) { return (++ss->seq); } struct snl_msg_info { int cmsg_type; int cmsg_level; uint32_t process_id; uint8_t port_id; uint8_t seq_id; }; static inline bool parse_cmsg(struct snl_state *ss, const struct msghdr *msg, struct snl_msg_info *attrs); static inline struct nlmsghdr * snl_read_message_dbg(struct snl_state *ss, struct snl_msg_info *cinfo) { memset(cinfo, 0, sizeof(*cinfo)); if (ss->off == ss->datalen) { struct sockaddr_nl nladdr; char cbuf[64]; struct iovec iov = { .iov_base = ss->buf, .iov_len = ss->bufsize, }; struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = &iov, .msg_iovlen = 1, .msg_control = cbuf, .msg_controllen = sizeof(cbuf), }; ss->off = 0; ss->datalen = 0; for (;;) { ssize_t datalen = recvmsg(ss->fd, &msg, 0); if (datalen > 0) { ss->datalen = datalen; parse_cmsg(ss, &msg, cinfo); break; } else if (errno != EINTR) return (NULL); } } struct nlmsghdr *hdr = (struct nlmsghdr *)(void *)&ss->buf[ss->off]; ss->off += NLMSG_ALIGN(hdr->nlmsg_len); return (hdr); } static inline struct nlmsghdr * snl_read_message(struct snl_state *ss) { if (ss->off == ss->datalen) { struct sockaddr_nl nladdr; struct iovec iov = { .iov_base = ss->buf, .iov_len = ss->bufsize, }; struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), .msg_iov = &iov, .msg_iovlen = 1, }; ss->off = 0; ss->datalen = 0; for (;;) { ssize_t datalen = recvmsg(ss->fd, &msg, 0); if (datalen > 0) { ss->datalen = datalen; break; } else if (errno != EINTR) return (NULL); } } struct nlmsghdr *hdr = (struct nlmsghdr *)(void *)&ss->buf[ss->off]; ss->off += NLMSG_ALIGN(hdr->nlmsg_len); return (hdr); } static inline struct nlmsghdr * snl_read_reply(struct snl_state *ss, uint32_t nlmsg_seq) { struct nlmsghdr *hdr; while ((hdr = snl_read_message(ss)) != NULL) { if (hdr->nlmsg_seq == nlmsg_seq) return (hdr); } return (NULL); } /* * Checks that attributes are sorted by attribute type. */ static inline void snl_verify_parsers(const struct snl_hdr_parser **parser, int count) { for (int i = 0; i < count; i++) { const struct snl_hdr_parser *p = parser[i]; int attr_type = 0; for (int j = 0; j < p->np_size; j++) { assert(p->np[j].type > attr_type); attr_type = p->np[j].type; } } } -#define SNL_VERIFY_PARSERS(_p) snl_verify_parsers((_p), NL_ARRAY_LEN(_p)) +#define SNL_VERIFY_PARSERS(_p) snl_verify_parsers((_p), nitems(_p)) static const struct snl_attr_parser * find_parser(const struct snl_attr_parser *ps, int pslen, int key) { int left_i = 0, right_i = pslen - 1; if (key < ps[0].type || key > ps[pslen - 1].type) return (NULL); while (left_i + 1 < right_i) { int mid_i = (left_i + right_i) / 2; if (key < ps[mid_i].type) right_i = mid_i; else if (key > ps[mid_i].type) left_i = mid_i + 1; else return (&ps[mid_i]); } if (ps[left_i].type == key) return (&ps[left_i]); else if (ps[right_i].type == key) return (&ps[right_i]); return (NULL); } static inline bool snl_parse_attrs_raw(struct snl_state *ss, struct nlattr *nla_head, int len, const struct snl_attr_parser *ps, int pslen, void *target) { struct nlattr *nla; NLA_FOREACH(nla, nla_head, len) { if (nla->nla_len < sizeof(struct nlattr)) return (false); int nla_type = nla->nla_type & NLA_TYPE_MASK; const struct snl_attr_parser *s = find_parser(ps, pslen, nla_type); if (s != NULL) { void *ptr = (void *)((char *)target + s->off); if (!s->cb(ss, nla, s->arg, ptr)) return (false); } } return (true); } static inline bool snl_parse_attrs(struct snl_state *ss, struct nlmsghdr *hdr, int hdrlen, const struct snl_attr_parser *ps, int pslen, void *target) { int off = NLMSG_HDRLEN + NETLINK_ALIGN(hdrlen); int len = hdr->nlmsg_len - off; struct nlattr *nla_head = (struct nlattr *)(void *)((char *)hdr + off); return (snl_parse_attrs_raw(ss, nla_head, len, ps, pslen, target)); } static inline void snl_parse_fields(struct snl_state *ss, struct nlmsghdr *hdr, int hdrlen __unused, const struct snl_field_parser *ps, int pslen, void *target) { for (int i = 0; i < pslen; i++) { const struct snl_field_parser *fp = &ps[i]; void *src = (char *)hdr + fp->off_in; void *dst = (char *)target + fp->off_out; fp->cb(ss, src, dst); } } static inline bool snl_parse_header(struct snl_state *ss, void *hdr, int len, const struct snl_hdr_parser *parser, void *target) { struct nlattr *nla_head; /* Extract fields first (if any) */ snl_parse_fields(ss, (struct nlmsghdr *)hdr, parser->in_hdr_size, parser->fp, parser->fp_size, target); nla_head = (struct nlattr *)(void *)((char *)hdr + parser->in_hdr_size); bool result = snl_parse_attrs_raw(ss, nla_head, len - parser->in_hdr_size, parser->np, parser->np_size, target); if (result && parser->cb_post != NULL) result = parser->cb_post(ss, target); return (result); } static inline bool snl_parse_nlmsg(struct snl_state *ss, struct nlmsghdr *hdr, const struct snl_hdr_parser *parser, void *target) { return (snl_parse_header(ss, hdr + 1, hdr->nlmsg_len - sizeof(*hdr), parser, target)); } static inline bool snl_attr_get_flag(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { if (NLA_DATA_LEN(nla) == 0) { *((uint8_t *)target) = 1; return (true); } return (false); } static inline bool snl_attr_get_bytes(struct snl_state *ss __unused, struct nlattr *nla, const void *arg, void *target) { if ((size_t)NLA_DATA_LEN(nla) != (size_t)arg) return (false); memcpy(target, NLA_DATA_CONST(nla), (size_t)arg); return (true); } static inline bool snl_attr_get_bool(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { if (NLA_DATA_LEN(nla) == sizeof(bool)) { *((bool *)target) = *((const bool *)NLA_DATA_CONST(nla)); return (true); } return (false); } static inline bool snl_attr_get_uint8(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { if (NLA_DATA_LEN(nla) == sizeof(uint8_t)) { *((uint8_t *)target) = *((const uint8_t *)NLA_DATA_CONST(nla)); return (true); } return (false); } static inline bool snl_attr_get_uint16(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { if (NLA_DATA_LEN(nla) == sizeof(uint16_t)) { *((uint16_t *)target) = *((const uint16_t *)NLA_DATA_CONST(nla)); return (true); } return (false); } static inline bool snl_attr_get_uint32(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { if (NLA_DATA_LEN(nla) == sizeof(uint32_t)) { *((uint32_t *)target) = *((const uint32_t *)NLA_DATA_CONST(nla)); return (true); } return (false); } static inline bool snl_attr_get_uint64(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { if (NLA_DATA_LEN(nla) == sizeof(uint64_t)) { memcpy(target, NLA_DATA_CONST(nla), sizeof(uint64_t)); return (true); } return (false); } static inline bool snl_attr_get_int8(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { return (snl_attr_get_uint8(ss, nla, arg, target)); } static inline bool snl_attr_get_int16(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { return (snl_attr_get_uint16(ss, nla, arg, target)); } static inline bool snl_attr_get_int32(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { return (snl_attr_get_uint32(ss, nla, arg, target)); } static inline bool snl_attr_get_int64(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { return (snl_attr_get_uint64(ss, nla, arg, target)); } static inline bool snl_attr_get_string(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { size_t maxlen = NLA_DATA_LEN(nla); if (strnlen((char *)NLA_DATA(nla), maxlen) < maxlen) { *((char **)target) = (char *)NLA_DATA(nla); return (true); } return (false); } static inline bool snl_attr_get_stringn(struct snl_state *ss, struct nlattr *nla, const void *arg __unused, void *target) { int maxlen = NLA_DATA_LEN(nla); char *buf = (char *)snl_allocz(ss, maxlen + 1); if (buf == NULL) return (false); buf[maxlen] = '\0'; memcpy(buf, NLA_DATA(nla), maxlen); *((char **)target) = buf; return (true); } static inline bool snl_attr_copy_string(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { char *tmp; if (snl_attr_get_string(ss, nla, NULL, &tmp)) { strlcpy((char *)target, tmp, (size_t)arg); return (true); } return (false); } static inline bool snl_attr_dup_string(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { size_t maxlen = NLA_DATA_LEN(nla); if (strnlen((char *)NLA_DATA(nla), maxlen) < maxlen) { char *buf = (char *)snl_allocz(ss, maxlen); if (buf == NULL) return (false); memcpy(buf, NLA_DATA(nla), maxlen); *((char **)target) = buf; return (true); } return (false); } static inline bool snl_attr_get_nested(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { const struct snl_hdr_parser *p = (const struct snl_hdr_parser *)arg; /* Assumes target points to the beginning of the structure */ return (snl_parse_header(ss, NLA_DATA(nla), NLA_DATA_LEN(nla), p, target)); } struct snl_parray { uint32_t count; void **items; }; static inline bool snl_attr_get_parray_sz(struct snl_state *ss, struct nlattr *container_nla, uint32_t start_size, const void *arg, void *target) { const struct snl_hdr_parser *p = (const struct snl_hdr_parser *)arg; struct snl_parray *array = (struct snl_parray *)target; struct nlattr *nla; uint32_t count = 0, size = start_size; if (p->out_size == 0) return (false); array->items = (void **)snl_allocz(ss, size * sizeof(void *)); if (array->items == NULL) return (false); /* * If the provided parser is an attribute parser, assume that each * nla in the container nla is the container nla itself and parse * the contents of this nla. * Otherwise, run the parser on raw data, assuming the header of this * data has u16 field with total size in the beginning. */ uint32_t data_off = 0; if (p->in_hdr_size == 0) data_off = sizeof(struct nlattr); NLA_FOREACH(nla, NLA_DATA(container_nla), NLA_DATA_LEN(container_nla)) { void *item = snl_allocz(ss, p->out_size); if (item == NULL) return (false); void *data = (char *)(void *)nla + data_off; int data_len = nla->nla_len - data_off; if (!(snl_parse_header(ss, data, data_len, p, item))) return (false); if (count == size) { uint32_t new_size = size * 2; void **new_array = (void **)snl_allocz(ss, new_size *sizeof(void *)); memcpy(new_array, array->items, size * sizeof(void *)); array->items = new_array; size = new_size; } array->items[count++] = item; } array->count = count; return (true); } /* * Parses and stores the unknown-size array. * Assumes each array item is a container and the NLAs in the container are parsable * by the parser provided in @arg. * Assumes @target is struct snl_parray */ static inline bool snl_attr_get_parray(struct snl_state *ss, struct nlattr *nla, const void *arg, void *target) { return (snl_attr_get_parray_sz(ss, nla, 8, arg, target)); } static inline bool snl_attr_get_nla(struct snl_state *ss __unused, struct nlattr *nla, const void *arg __unused, void *target) { *((struct nlattr **)target) = nla; return (true); } static inline bool snl_attr_dup_nla(struct snl_state *ss, struct nlattr *nla, const void *arg __unused, void *target) { void *ptr = snl_allocz(ss, nla->nla_len); if (ptr != NULL) { memcpy(ptr, nla, nla->nla_len); *((void **)target) = ptr; return (true); } return (false); } static inline bool snl_attr_copy_struct(struct snl_state *ss, struct nlattr *nla, const void *arg __unused, void *target) { void *ptr = snl_allocz(ss, NLA_DATA_LEN(nla)); if (ptr != NULL) { memcpy(ptr, NLA_DATA(nla), NLA_DATA_LEN(nla)); *((void **)target) = ptr; return (true); } return (false); } static inline bool snl_attr_dup_struct(struct snl_state *ss, struct nlattr *nla, const void *arg __unused, void *target) { void *ptr = snl_allocz(ss, NLA_DATA_LEN(nla)); if (ptr != NULL) { memcpy(ptr, NLA_DATA(nla), NLA_DATA_LEN(nla)); *((void **)target) = ptr; return (true); } return (false); } struct snl_attr_bit { uint32_t bit_index; char *bit_name; int bit_value; }; struct snl_attr_bits { uint32_t num_bits; struct snl_attr_bit **bits; }; #define _OUT(_field) offsetof(struct snl_attr_bit, _field) static const struct snl_attr_parser _nla_p_bit[] = { { .type = NLA_BITSET_BIT_INDEX, .off = _OUT(bit_index), .cb = snl_attr_get_uint32 }, { .type = NLA_BITSET_BIT_NAME, .off = _OUT(bit_name), .cb = snl_attr_dup_string }, { .type = NLA_BITSET_BIT_VALUE, .off = _OUT(bit_value), .cb = snl_attr_get_flag }, }; #undef _OUT SNL_DECLARE_ATTR_PARSER_EXT(_nla_bit_parser, sizeof(struct snl_attr_bit), _nla_p_bit, NULL); struct snl_attr_bitset { uint32_t nla_bitset_size; uint32_t *nla_bitset_mask; uint32_t *nla_bitset_value; struct snl_attr_bits bits; }; #define _OUT(_field) offsetof(struct snl_attr_bitset, _field) static const struct snl_attr_parser _nla_p_bitset[] = { { .type = NLA_BITSET_SIZE, .off = _OUT(nla_bitset_size), .cb = snl_attr_get_uint32 }, { .type = NLA_BITSET_BITS, .off = _OUT(bits), .cb = snl_attr_get_parray, .arg = &_nla_bit_parser }, { .type = NLA_BITSET_VALUE, .off = _OUT(nla_bitset_mask), .cb = snl_attr_dup_nla }, { .type = NLA_BITSET_MASK, .off = _OUT(nla_bitset_value), .cb = snl_attr_dup_nla }, }; static inline bool _cb_p_bitset(struct snl_state *ss __unused, void *_target) { struct snl_attr_bitset *target = (struct snl_attr_bitset *)_target; uint32_t sz_bytes = _roundup2(target->nla_bitset_size, 32) / 8; if (target->nla_bitset_mask != NULL) { struct nlattr *nla = (struct nlattr *)target->nla_bitset_mask; uint32_t data_len = NLA_DATA_LEN(nla); if (data_len != sz_bytes || _roundup2(data_len, 4) != data_len) return (false); target->nla_bitset_mask = (uint32_t *)NLA_DATA(nla); } if (target->nla_bitset_value != NULL) { struct nlattr *nla = (struct nlattr *)target->nla_bitset_value; uint32_t data_len = NLA_DATA_LEN(nla); if (data_len != sz_bytes || _roundup2(data_len, 4) != data_len) return (false); target->nla_bitset_value = (uint32_t *)NLA_DATA(nla); } return (true); } #undef _OUT SNL_DECLARE_ATTR_PARSER_EXT(_nla_bitset_parser, sizeof(struct snl_attr_bitset), _nla_p_bitset, _cb_p_bitset); /* * Parses the compact bitset representation. */ static inline bool snl_attr_get_bitset_c(struct snl_state *ss, struct nlattr *nla, const void *arg __unused, void *_target) { const struct snl_hdr_parser *p = &_nla_bitset_parser; struct snl_attr_bitset *target = (struct snl_attr_bitset *)_target; /* Assumes target points to the beginning of the structure */ if (!snl_parse_header(ss, NLA_DATA(nla), NLA_DATA_LEN(nla), p, _target)) return (false); if (target->nla_bitset_mask == NULL || target->nla_bitset_value == NULL) return (false); return (true); } static inline void snl_field_get_uint8(struct snl_state *ss __unused, void *src, void *target) { *((uint8_t *)target) = *((uint8_t *)src); } static inline void snl_field_get_uint16(struct snl_state *ss __unused, void *src, void *target) { *((uint16_t *)target) = *((uint16_t *)src); } static inline void snl_field_get_uint32(struct snl_state *ss __unused, void *src, void *target) { *((uint32_t *)target) = *((uint32_t *)src); } static inline void snl_field_get_ptr(struct snl_state *ss __unused, void *src, void *target) { *((void **)target) = src; } struct snl_errmsg_data { struct nlmsghdr *orig_hdr; int error; uint32_t error_offs; char *error_str; struct nlattr *cookie; }; #define _IN(_field) offsetof(struct nlmsgerr, _field) #define _OUT(_field) offsetof(struct snl_errmsg_data, _field) static const struct snl_attr_parser nla_p_errmsg[] = { { .type = NLMSGERR_ATTR_MSG, .off = _OUT(error_str), .cb = snl_attr_get_string }, { .type = NLMSGERR_ATTR_OFFS, .off = _OUT(error_offs), .cb = snl_attr_get_uint32 }, { .type = NLMSGERR_ATTR_COOKIE, .off = _OUT(cookie), .cb = snl_attr_get_nla }, }; static const struct snl_field_parser nlf_p_errmsg[] = { { .off_in = _IN(error), .off_out = _OUT(error), .cb = snl_field_get_uint32 }, { .off_in = _IN(msg), .off_out = _OUT(orig_hdr), .cb = snl_field_get_ptr }, }; #undef _IN #undef _OUT SNL_DECLARE_PARSER(snl_errmsg_parser, struct nlmsgerr, nlf_p_errmsg, nla_p_errmsg); #define _IN(_field) offsetof(struct nlmsgerr, _field) #define _OUT(_field) offsetof(struct snl_errmsg_data, _field) static const struct snl_field_parser nlf_p_donemsg[] = { { .off_in = _IN(error), .off_out = _OUT(error), .cb = snl_field_get_uint32 }, }; #undef _IN #undef _OUT SNL_DECLARE_FIELD_PARSER(snl_donemsg_parser, struct nlmsgerr, nlf_p_donemsg); static inline bool snl_parse_errmsg(struct snl_state *ss, struct nlmsghdr *hdr, struct snl_errmsg_data *e) { if ((hdr->nlmsg_flags & NLM_F_CAPPED) != 0) return (snl_parse_nlmsg(ss, hdr, &snl_errmsg_parser, e)); const struct snl_hdr_parser *ps = &snl_errmsg_parser; struct nlmsgerr *errmsg = (struct nlmsgerr *)(hdr + 1); int hdrlen = sizeof(int) + NLMSG_ALIGN(errmsg->msg.nlmsg_len); struct nlattr *attr_head = (struct nlattr *)(void *)((char *)errmsg + hdrlen); int attr_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; snl_parse_fields(ss, (struct nlmsghdr *)errmsg, hdrlen, ps->fp, ps->fp_size, e); return (snl_parse_attrs_raw(ss, attr_head, attr_len, ps->np, ps->np_size, e)); } static inline bool snl_read_reply_code(struct snl_state *ss, uint32_t nlmsg_seq, struct snl_errmsg_data *e) { struct nlmsghdr *hdr = snl_read_reply(ss, nlmsg_seq); if (hdr == NULL) { e->error = EINVAL; } else if (hdr->nlmsg_type == NLMSG_ERROR) { if (!snl_parse_errmsg(ss, hdr, e)) e->error = EINVAL; return (e->error == 0); } return (false); } #define _OUT(_field) offsetof(struct snl_msg_info, _field) static const struct snl_attr_parser _nla_p_cinfo[] = { { .type = NLMSGINFO_ATTR_PROCESS_ID, .off = _OUT(process_id), .cb = snl_attr_get_uint32 }, { .type = NLMSGINFO_ATTR_PORT_ID, .off = _OUT(port_id), .cb = snl_attr_get_uint32 }, { .type = NLMSGINFO_ATTR_SEQ_ID, .off = _OUT(seq_id), .cb = snl_attr_get_uint32 }, }; #undef _OUT SNL_DECLARE_ATTR_PARSER(snl_msg_info_parser, _nla_p_cinfo); static inline bool parse_cmsg(struct snl_state *ss, const struct msghdr *msg, struct snl_msg_info *attrs) { for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (cmsg->cmsg_level != SOL_NETLINK || cmsg->cmsg_type != NETLINK_MSG_INFO) continue; void *data = CMSG_DATA(cmsg); int len = cmsg->cmsg_len - ((char *)data - (char *)cmsg); const struct snl_hdr_parser *ps = &snl_msg_info_parser; return (snl_parse_attrs_raw(ss, (struct nlattr *)data, len, ps->np, ps->np_size, attrs)); } return (false); } /* * Assumes e is zeroed */ static inline struct nlmsghdr * snl_read_reply_multi(struct snl_state *ss, uint32_t nlmsg_seq, struct snl_errmsg_data *e) { struct nlmsghdr *hdr = snl_read_reply(ss, nlmsg_seq); if (hdr == NULL) { e->error = EINVAL; } else if (hdr->nlmsg_type == NLMSG_ERROR) { if (!snl_parse_errmsg(ss, hdr, e)) e->error = EINVAL; } else if (hdr->nlmsg_type == NLMSG_DONE) { snl_parse_nlmsg(ss, hdr, &snl_donemsg_parser, e); } else return (hdr); return (NULL); } /* writer logic */ struct snl_writer { char *base; uint32_t offset; uint32_t size; struct nlmsghdr *hdr; struct snl_state *ss; bool error; }; static inline void snl_init_writer(struct snl_state *ss, struct snl_writer *nw) { nw->size = SNL_WRITER_BUFFER_SIZE; nw->base = (char *)snl_allocz(ss, nw->size); if (nw->base == NULL) { nw->error = true; nw->size = 0; } nw->offset = 0; nw->hdr = NULL; nw->error = false; nw->ss = ss; } static inline bool snl_realloc_msg_buffer(struct snl_writer *nw, size_t sz) { uint32_t new_size = nw->size * 2; while (new_size < nw->size + sz) new_size *= 2; if (nw->error) return (false); if (snl_allocz(nw->ss, new_size) == NULL) { nw->error = true; return (false); } nw->size = new_size; void *new_base = nw->ss->lb->base; if (new_base != nw->base) { memcpy(new_base, nw->base, nw->offset); if (nw->hdr != NULL) { int hdr_off = (char *)(nw->hdr) - nw->base; nw->hdr = (struct nlmsghdr *) (void *)((char *)new_base + hdr_off); } nw->base = (char *)new_base; } return (true); } static inline void * snl_reserve_msg_data_raw(struct snl_writer *nw, size_t sz) { sz = NETLINK_ALIGN(sz); if (__predict_false(nw->offset + sz > nw->size)) { if (!snl_realloc_msg_buffer(nw, sz)) return (NULL); } void *data_ptr = &nw->base[nw->offset]; nw->offset += sz; return (data_ptr); } #define snl_reserve_msg_object(_ns, _t) ((_t *)snl_reserve_msg_data_raw(_ns, sizeof(_t))) #define snl_reserve_msg_data(_ns, _sz, _t) ((_t *)snl_reserve_msg_data_raw(_ns, _sz)) static inline void * _snl_reserve_msg_attr(struct snl_writer *nw, uint16_t nla_type, uint16_t sz) { sz += sizeof(struct nlattr); struct nlattr *nla = snl_reserve_msg_data(nw, sz, struct nlattr); if (__predict_false(nla == NULL)) return (NULL); nla->nla_type = nla_type; nla->nla_len = sz; return ((void *)(nla + 1)); } #define snl_reserve_msg_attr(_ns, _at, _t) ((_t *)_snl_reserve_msg_attr(_ns, _at, sizeof(_t))) static inline bool snl_add_msg_attr(struct snl_writer *nw, int attr_type, int attr_len, const void *data) { int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr)); if (__predict_false(nw->offset + required_len > nw->size)) { if (!snl_realloc_msg_buffer(nw, required_len)) return (false); } struct nlattr *nla = (struct nlattr *)(void *)(&nw->base[nw->offset]); nla->nla_len = attr_len + sizeof(struct nlattr); nla->nla_type = attr_type; if (attr_len > 0) { if ((attr_len % 4) != 0) { /* clear padding bytes */ bzero((char *)nla + required_len - 4, 4); } memcpy((nla + 1), data, attr_len); } nw->offset += required_len; return (true); } static inline bool snl_add_msg_attr_raw(struct snl_writer *nw, const struct nlattr *nla_src) { int attr_len = nla_src->nla_len - sizeof(struct nlattr); assert(attr_len >= 0); return (snl_add_msg_attr(nw, nla_src->nla_type, attr_len, (const void *)(nla_src + 1))); } static inline bool snl_add_msg_attr_bool(struct snl_writer *nw, int attrtype, bool value) { return (snl_add_msg_attr(nw, attrtype, sizeof(bool), &value)); } static inline bool snl_add_msg_attr_u8(struct snl_writer *nw, int attrtype, uint8_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(uint8_t), &value)); } static inline bool snl_add_msg_attr_u16(struct snl_writer *nw, int attrtype, uint16_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(uint16_t), &value)); } static inline bool snl_add_msg_attr_u32(struct snl_writer *nw, int attrtype, uint32_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(uint32_t), &value)); } static inline bool snl_add_msg_attr_u64(struct snl_writer *nw, int attrtype, uint64_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(uint64_t), &value)); } static inline bool snl_add_msg_attr_s8(struct snl_writer *nw, int attrtype, int8_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(int8_t), &value)); } static inline bool snl_add_msg_attr_s16(struct snl_writer *nw, int attrtype, int16_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(int16_t), &value)); } static inline bool snl_add_msg_attr_s32(struct snl_writer *nw, int attrtype, int32_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(int32_t), &value)); } static inline bool snl_add_msg_attr_s64(struct snl_writer *nw, int attrtype, int64_t value) { return (snl_add_msg_attr(nw, attrtype, sizeof(int64_t), &value)); } static inline bool snl_add_msg_attr_flag(struct snl_writer *nw, int attrtype) { return (snl_add_msg_attr(nw, attrtype, 0, NULL)); } static inline bool snl_add_msg_attr_string(struct snl_writer *nw, int attrtype, const char *str) { return (snl_add_msg_attr(nw, attrtype, strlen(str) + 1, str)); } static inline int snl_get_msg_offset(const struct snl_writer *nw) { return (nw->offset - ((char *)nw->hdr - nw->base)); } static inline void * _snl_restore_msg_offset(const struct snl_writer *nw, int off) { return ((void *)((char *)nw->hdr + off)); } #define snl_restore_msg_offset(_ns, _off, _t) ((_t *)_snl_restore_msg_offset(_ns, _off)) static inline int snl_add_msg_attr_nested(struct snl_writer *nw, int attrtype) { int off = snl_get_msg_offset(nw); struct nlattr *nla = snl_reserve_msg_data(nw, sizeof(struct nlattr), struct nlattr); if (__predict_false(nla == NULL)) return (0); nla->nla_type = attrtype; return (off); } static inline void snl_end_attr_nested(const struct snl_writer *nw, int off) { if (!nw->error) { struct nlattr *nla = snl_restore_msg_offset(nw, off, struct nlattr); nla->nla_len = NETLINK_ALIGN(snl_get_msg_offset(nw) - off); } } static inline struct nlmsghdr * snl_create_msg_request(struct snl_writer *nw, int nlmsg_type) { assert(nw->hdr == NULL); struct nlmsghdr *hdr = snl_reserve_msg_object(nw, struct nlmsghdr); hdr->nlmsg_type = nlmsg_type; hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; nw->hdr = hdr; return (hdr); } static void snl_abort_msg(struct snl_writer *nw) { if (nw->hdr != NULL) { int offset = (char *)(&nw->base[nw->offset]) - (char *)(nw->hdr); nw->offset -= offset; nw->hdr = NULL; } } static inline struct nlmsghdr * snl_finalize_msg(struct snl_writer *nw) { if (nw->error) snl_abort_msg(nw); if (nw->hdr != NULL) { struct nlmsghdr *hdr = nw->hdr; int offset = (char *)(&nw->base[nw->offset]) - (char *)(nw->hdr); hdr->nlmsg_len = offset; hdr->nlmsg_seq = snl_get_seq(nw->ss); nw->hdr = NULL; return (hdr); } return (NULL); } static inline bool snl_send_msgs(struct snl_writer *nw) { int offset = nw->offset; assert(nw->hdr == NULL); nw->offset = 0; return (snl_send(nw->ss, nw->base, offset)); } static const struct snl_hdr_parser *snl_all_core_parsers[] = { &snl_errmsg_parser, &snl_donemsg_parser, &_nla_bit_parser, &_nla_bitset_parser, }; #endif diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index c19161f5bbb7..86b1f8f1b1bc 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -1,1535 +1,1535 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* scope deembedding */ #include #include #include #include #define DEBUG_MOD_NAME nl_iface #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; struct ucred *cred; uint32_t fibnum; int family; int error; int count; int dumped; }; static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event; static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners); static struct sx rtnl_cloner_lock; SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock"); /* These are external hooks for CARP. */ extern int (*carp_get_vhid_p)(struct ifaddr *); /* * RTM_GETLINK request * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0}, * {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32 * * Reply: * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"} [ {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"}, {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000}, {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6}, {{nla_len=5, nla_type=IFLA_LINKMODE}, 0}, {{nla_len=8, nla_type=IFLA_MTU}, 1500}, {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68}, {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000}, {{nla_len=8, nla_type=IFLA_GROUP}, 0}, {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0}, {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536}, {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1}, {{nla_len=5, nla_type=IFLA_CARRIER}, 1}, {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"}, {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2}, {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0}, {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1}, {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1}, */ struct if_state { uint8_t ifla_operstate; uint8_t ifla_carrier; }; static void get_operstate_ether(if_t ifp, struct if_state *pstate) { struct ifmediareq ifmr = {}; int error; error = if_ioctl(ifp, SIOCGIFMEDIA, (void *)&ifmr); if (error != 0) { NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d", if_name(ifp), error); return; } switch (IFM_TYPE(ifmr.ifm_active)) { case IFM_ETHER: if (ifmr.ifm_status & IFM_ACTIVE) { pstate->ifla_carrier = 1; if (if_getflags(ifp) & IFF_MONITOR) pstate->ifla_operstate = IF_OPER_DORMANT; else pstate->ifla_operstate = IF_OPER_UP; } else pstate->ifla_operstate = IF_OPER_DOWN; } } static bool get_stats(struct nl_writer *nw, if_t ifp) { struct rtnl_link_stats64 *stats; int nla_len = sizeof(struct nlattr) + sizeof(*stats); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) return (false); nla->nla_type = IFLA_STATS64; nla->nla_len = nla_len; stats = (struct rtnl_link_stats64 *)(nla + 1); stats->rx_packets = if_getcounter(ifp, IFCOUNTER_IPACKETS); stats->tx_packets = if_getcounter(ifp, IFCOUNTER_OPACKETS); stats->rx_bytes = if_getcounter(ifp, IFCOUNTER_IBYTES); stats->tx_bytes = if_getcounter(ifp, IFCOUNTER_OBYTES); stats->rx_errors = if_getcounter(ifp, IFCOUNTER_IERRORS); stats->tx_errors = if_getcounter(ifp, IFCOUNTER_OERRORS); stats->rx_dropped = if_getcounter(ifp, IFCOUNTER_IQDROPS); stats->tx_dropped = if_getcounter(ifp, IFCOUNTER_OQDROPS); stats->multicast = if_getcounter(ifp, IFCOUNTER_IMCASTS); stats->rx_nohandler = if_getcounter(ifp, IFCOUNTER_NOPROTO); return (true); } static void get_operstate(if_t ifp, struct if_state *pstate) { pstate->ifla_operstate = IF_OPER_UNKNOWN; pstate->ifla_carrier = 0; /* no carrier */ switch (if_gettype(ifp)) { case IFT_ETHER: case IFT_L2VLAN: get_operstate_ether(ifp, pstate); break; default: /* Map admin state to the operstate */ if (if_getflags(ifp) & IFF_UP) { pstate->ifla_operstate = IF_OPER_UP; pstate->ifla_carrier = 1; } else pstate->ifla_operstate = IF_OPER_DOWN; break; } } static void get_hwaddr(struct nl_writer *nw, if_t ifp) { struct ifreq ifr = {}; if (if_gethwaddr(ifp, &ifr) == 0) { nlattr_add(nw, IFLAF_ORIG_HWADDR, if_getaddrlen(ifp), ifr.ifr_addr.sa_data); } } static unsigned ifp_flags_to_netlink(const if_t ifp) { return (if_getflags(ifp) | if_getdrvflags(ifp)); } #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen)) static bool dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa) { uint32_t addr_len = 0; const void *addr_data = NULL; #ifdef INET6 struct in6_addr addr6; #endif if (sa == NULL) return (true); switch (sa->sa_family) { #ifdef INET case AF_INET: addr_len = sizeof(struct in_addr); addr_data = &((const struct sockaddr_in *)sa)->sin_addr; break; #endif #ifdef INET6 case AF_INET6: in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len); addr_len = sizeof(struct in6_addr); addr_data = &addr6; break; #endif case AF_LINK: addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen; addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa); break; case AF_UNSPEC: /* Ignore empty SAs without warning */ return (true); default: NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family); return (true); } return (nlattr_add(nw, attr, addr_len, addr_data)); } static bool dump_iface_caps(struct nl_writer *nw, struct ifnet *ifp) { int off = nlattr_add_nested(nw, IFLAF_CAPS); uint32_t active_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {}; uint32_t all_caps[roundup2(IFCAP_B_SIZE, 32) / 32] = {}; MPASS(sizeof(active_caps) >= 8); MPASS(sizeof(all_caps) >= 8); if (off == 0) return (false); active_caps[0] = (uint32_t)if_getcapabilities(ifp); all_caps[0] = (uint32_t)if_getcapenable(ifp); active_caps[1] = (uint32_t)if_getcapabilities2(ifp); all_caps[1] = (uint32_t)if_getcapenable2(ifp); nlattr_add_u32(nw, NLA_BITSET_SIZE, IFCAP_B_SIZE); nlattr_add(nw, NLA_BITSET_MASK, sizeof(all_caps), all_caps); nlattr_add(nw, NLA_BITSET_VALUE, sizeof(active_caps), active_caps); nlattr_set_len(nw, off); return (true); } /* * Dumps interface state, properties and metrics. * @nw: message writer * @ifp: target interface * @hdr: template header * @if_flags_mask: changed if_[drv]_flags bitmask * * This function is called without epoch and MAY sleep. */ static bool dump_iface(struct nl_writer *nw, if_t ifp, const struct nlmsghdr *hdr, int if_flags_mask) { struct epoch_tracker et; struct ifinfomsg *ifinfo; NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp)); if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg))) goto enomem; ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg); ifinfo->ifi_family = AF_UNSPEC; ifinfo->__ifi_pad = 0; ifinfo->ifi_type = if_gettype(ifp); ifinfo->ifi_index = if_getindex(ifp); ifinfo->ifi_flags = ifp_flags_to_netlink(ifp); ifinfo->ifi_change = if_flags_mask; struct if_state ifs = {}; get_operstate(ifp, &ifs); if (ifs.ifla_operstate == IF_OPER_UP) ifinfo->ifi_flags |= IFF_LOWER_UP; nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp)); nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate); nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier); /* nlattr_add_u8(nw, IFLA_PROTO_DOWN, val); nlattr_add_u8(nw, IFLA_LINKMODE, val); */ if (if_getaddrlen(ifp) != 0) { struct ifaddr *ifa; struct ifa_iter it; NET_EPOCH_ENTER(et); ifa = ifa_iter_start(ifp, &it); if (ifa != NULL) dump_sa(nw, IFLA_ADDRESS, ifa->ifa_addr); ifa_iter_finish(&it); NET_EPOCH_EXIT(et); } if ((if_getbroadcastaddr(ifp) != NULL)) { nlattr_add(nw, IFLA_BROADCAST, if_getaddrlen(ifp), if_getbroadcastaddr(ifp)); } nlattr_add_u32(nw, IFLA_MTU, if_getmtu(ifp)); /* nlattr_add_u32(nw, IFLA_MIN_MTU, 60); nlattr_add_u32(nw, IFLA_MAX_MTU, 9000); nlattr_add_u32(nw, IFLA_GROUP, 0); */ if (if_getdescr(ifp) != NULL) nlattr_add_string(nw, IFLA_IFALIAS, if_getdescr(ifp)); /* Store FreeBSD-specific attributes */ int off = nlattr_add_nested(nw, IFLA_FREEBSD); if (off != 0) { get_hwaddr(nw, ifp); dump_iface_caps(nw, ifp); nlattr_set_len(nw, off); } get_stats(nw, ifp); uint32_t val = (if_getflags(ifp) & IFF_PROMISC) != 0; nlattr_add_u32(nw, IFLA_PROMISCUITY, val); ifc_dump_ifp_nl(ifp, nw); if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp)); nlmsg_abort(nw); return (false); } static bool check_ifmsg(void *hdr, struct nl_pstate *npt) { struct ifinfomsg *ifm = hdr; if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 || ifm->ifi_flags != 0 || ifm->ifi_change != 0) { nlmsg_report_err_msg(npt, "strict checking: non-zero values in ifinfomsg header"); return (false); } return (true); } #define _IN(_field) offsetof(struct ifinfomsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_link, _field) static const struct nlfield_parser nlf_p_if[] = { { .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 }, { .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 }, { .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 }, { .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 }, }; static const struct nlattr_parser nla_p_linfo[] = { { .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn }, { .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla }, }; NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo); static const struct nlattr_parser nla_p_if[] = { { .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, { .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 }, { .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 }, { .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested }, { .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string }, { .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string }, { .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, }; #undef _IN #undef _OUT NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if); static bool match_iface(if_t ifp, void *_arg) { struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg; if (attrs->ifi_index != 0 && attrs->ifi_index != if_getindex(ifp)) return (false); if (attrs->ifi_type != 0 && attrs->ifi_index != if_gettype(ifp)) return (false); if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp))) return (false); /* TODO: add group match */ return (true); } static int dump_cb(if_t ifp, void *_arg) { struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; if (!dump_iface(wa->nw, ifp, &wa->hdr, 0)) return (ENOMEM); return (0); } /* * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0}, * {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, * [ * [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"], * [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF] * ] */ static int rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; if_t ifp; int error = 0; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWLINK, }; /* Fast track for an interface w/ explicit name or index match */ if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) { if (attrs.ifi_index != 0) { NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index); NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(attrs.ifi_index); NET_EPOCH_EXIT(et); } else { NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s", attrs.ifla_ifname); ifp = ifunit_ref(attrs.ifla_ifname); } if (ifp != NULL) { if (match_iface(ifp, &attrs)) { if (!dump_iface(wa.nw, ifp, &wa.hdr, 0)) error = ENOMEM; } else error = ENODEV; if_rele(ifp); } else error = ENODEV; return (error); } /* Always treat non-direct-match as a multipart message */ wa.hdr.nlmsg_flags |= NLM_F_MULTI; /* * Fetching some link properties require performing ioctl's that may be blocking. * Address it by saving referenced pointers of the matching links, * exiting from epoch and going through the list one-by-one. */ NL_LOG(LOG_DEBUG2, "Start dump"); if_foreach_sleep(match_iface, &attrs, dump_cb, &wa); NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } /* * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[ * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, * {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"], * [ * {nla_len=16, nla_type=IFLA_LINKINFO}, * [ * {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"... * ] * ] */ static int rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; if_t ifp; int error; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(attrs.ifi_index); NET_EPOCH_EXIT(et); if (ifp == NULL) { NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index); return (ENOENT); } NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp)); sx_xlock(&ifnet_detach_sxlock); error = if_clone_destroy(if_name(ifp)); sx_xunlock(&ifnet_detach_sxlock); NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error); if_rele(ifp); return (error); } /* * New link: * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0} * [ * {{nla_len=8, nla_type=IFLA_MTU}, 123}, * {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"}, * {{nla_len=24, nla_type=IFLA_LINKINFO}, * [ * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]} * * Update link: * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0}, * {{nla_len=8, nla_type=IFLA_MTU}, 123}} * * * Check command availability: * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0} */ static int create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs, struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) { if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) { NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute"); return (EINVAL); } if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) { NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute"); return (EINVAL); } struct ifc_data_nl ifd = { .flags = IFC_F_CREATE, .lattrs = lattrs, .bm = bm, .npt = npt, }; if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0) nl_store_ifp_cookie(npt, ifd.ifp); return (ifd.error); } static int modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs, struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) { if_t ifp = NULL; struct epoch_tracker et; if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) { /* * Applications like ip(8) verify RTM_NEWLINK command * existence by calling it with empty arguments. Always * return "innocent" error in that case. */ NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field"); return (EPERM); } if (lattrs->ifi_index != 0) { NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(lattrs->ifi_index); NET_EPOCH_EXIT(et); if (ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u", lattrs->ifi_index); return (ENOENT); } } if (ifp == NULL && lattrs->ifla_ifname != NULL) { ifp = ifunit_ref(lattrs->ifla_ifname); if (ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s", lattrs->ifla_ifname); return (ENOENT); } } MPASS(ifp != NULL); /* * Modification request can address either * 1) cloned interface, in which case we call the cloner-specific * modification routine * or * 2) non-cloned (e.g. "physical") interface, in which case we call * generic modification routine */ struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt }; if (!ifc_modify_ifp_nl(ifp, &ifd)) ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt); if_rele(ifp); return (ifd.error); } static int rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct nlattr_bmask bm; int error; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm); if (hdr->nlmsg_flags & NLM_F_CREATE) return (create_link(hdr, &attrs, &bm, nlp, npt)); else return (modify_link(hdr, &attrs, &bm, nlp, npt)); } static void set_scope6(struct sockaddr *sa, uint32_t ifindex) { #ifdef INET6 if (sa != NULL && sa->sa_family == AF_INET6) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) in6_set_unicast_scopeid(&sa6->sin6_addr, ifindex); } #endif } static bool check_sa_family(const struct sockaddr *sa, int family, const char *attr_name, struct nl_pstate *npt) { if (sa == NULL || sa->sa_family == family) return (true); nlmsg_report_err_msg(npt, "wrong family for %s attribute: %d != %d", attr_name, family, sa->sa_family); return (false); } struct nl_parsed_ifa { uint8_t ifa_family; uint8_t ifa_prefixlen; uint8_t ifa_scope; uint32_t ifa_index; uint32_t ifa_flags; uint32_t ifaf_vhid; uint32_t ifaf_flags; struct sockaddr *ifa_address; struct sockaddr *ifa_local; struct sockaddr *ifa_broadcast; struct ifa_cacheinfo *ifa_cacheinfo; struct sockaddr *f_ifa_addr; struct sockaddr *f_ifa_dst; }; static int nlattr_get_cinfo(struct nlattr *nla, struct nl_pstate *npt, const void *arg __unused, void *target) { if (__predict_false(NLA_DATA_LEN(nla) != sizeof(struct ifa_cacheinfo))) { NLMSG_REPORT_ERR_MSG(npt, "nla type %d size(%u) is not ifa_cacheinfo", nla->nla_type, NLA_DATA_LEN(nla)); return (EINVAL); } *((struct ifa_cacheinfo **)target) = (struct ifa_cacheinfo *)NL_RTA_DATA(nla); return (0); } #define _IN(_field) offsetof(struct ifaddrmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_ifa, _field) static const struct nlfield_parser nlf_p_ifa[] = { { .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 }, { .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 }, { .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 }, { .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 }, { .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 }, }; static const struct nlattr_parser nla_p_ifa_fbsd[] = { { .type = IFAF_VHID, .off = _OUT(ifaf_vhid), .cb = nlattr_get_uint32 }, { .type = IFAF_FLAGS, .off = _OUT(ifaf_flags), .cb = nlattr_get_uint32 }, }; NL_DECLARE_ATTR_PARSER(ifa_fbsd_parser, nla_p_ifa_fbsd); static const struct nlattr_parser nla_p_ifa[] = { { .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip }, { .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip }, { .type = IFA_BROADCAST, .off = _OUT(ifa_broadcast), .cb = nlattr_get_ip }, { .type = IFA_CACHEINFO, .off = _OUT(ifa_cacheinfo), .cb = nlattr_get_cinfo }, { .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 }, { .type = IFA_FREEBSD, .arg = &ifa_fbsd_parser, .cb = nlattr_get_nested }, }; #undef _IN #undef _OUT static bool post_p_ifa(void *_attrs, struct nl_pstate *npt) { struct nl_parsed_ifa *attrs = (struct nl_parsed_ifa *)_attrs; if (!check_sa_family(attrs->ifa_address, attrs->ifa_family, "IFA_ADDRESS", npt)) return (false); if (!check_sa_family(attrs->ifa_local, attrs->ifa_family, "IFA_LOCAL", npt)) return (false); if (!check_sa_family(attrs->ifa_broadcast, attrs->ifa_family, "IFA_BROADADDR", npt)) return (false); set_scope6(attrs->ifa_address, attrs->ifa_index); set_scope6(attrs->ifa_local, attrs->ifa_index); return (true); } NL_DECLARE_PARSER_EXT(ifa_parser, struct ifaddrmsg, NULL, nlf_p_ifa, nla_p_ifa, post_p_ifa); /* {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")}, [ {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")}, {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")}, {{nla_len=7, nla_type=IFA_LABEL}, "lo"}, {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}, {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]}, --- {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735}, {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")}, [ {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")}, {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}}, {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]}, */ static uint8_t ifa_get_scope(const struct ifaddr *ifa) { const struct sockaddr *sa; uint8_t addr_scope = RT_SCOPE_UNIVERSE; sa = ifa->ifa_addr; switch (sa->sa_family) { #ifdef INET case AF_INET: { struct in_addr addr; addr = ((const struct sockaddr_in *)sa)->sin_addr; if (IN_LOOPBACK(addr.s_addr)) addr_scope = RT_SCOPE_HOST; else if (IN_LINKLOCAL(addr.s_addr)) addr_scope = RT_SCOPE_LINK; break; } #endif #ifdef INET6 case AF_INET6: { const struct in6_addr *addr; addr = &((const struct sockaddr_in6 *)sa)->sin6_addr; if (IN6_IS_ADDR_LOOPBACK(addr)) addr_scope = RT_SCOPE_HOST; else if (IN6_IS_ADDR_LINKLOCAL(addr)) addr_scope = RT_SCOPE_LINK; break; } #endif } return (addr_scope); } #ifdef INET6 static uint8_t inet6_get_plen(const struct in6_addr *addr) { return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); } #endif static uint8_t get_sa_plen(const struct sockaddr *sa) { #ifdef INET const struct in_addr *paddr; #endif #ifdef INET6 const struct in6_addr *paddr6; #endif switch (sa->sa_family) { #ifdef INET case AF_INET: paddr = &(((const struct sockaddr_in *)sa)->sin_addr); return bitcount32(paddr->s_addr); #endif #ifdef INET6 case AF_INET6: paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr); return inet6_get_plen(paddr6); #endif } return (0); } #ifdef INET6 static uint32_t in6_flags_to_nl(uint32_t flags) { uint32_t nl_flags = 0; if (flags & IN6_IFF_TEMPORARY) nl_flags |= IFA_F_TEMPORARY; if (flags & IN6_IFF_NODAD) nl_flags |= IFA_F_NODAD; if (flags & IN6_IFF_DEPRECATED) nl_flags |= IFA_F_DEPRECATED; if (flags & IN6_IFF_TENTATIVE) nl_flags |= IFA_F_TENTATIVE; if ((flags & (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == 0) flags |= IFA_F_PERMANENT; if (flags & IN6_IFF_DUPLICATED) flags |= IFA_F_DADFAILED; return (nl_flags); } static uint32_t nl_flags_to_in6(uint32_t flags) { uint32_t in6_flags = 0; if (flags & IFA_F_TEMPORARY) in6_flags |= IN6_IFF_TEMPORARY; if (flags & IFA_F_NODAD) in6_flags |= IN6_IFF_NODAD; if (flags & IFA_F_DEPRECATED) in6_flags |= IN6_IFF_DEPRECATED; if (flags & IFA_F_TENTATIVE) in6_flags |= IN6_IFF_TENTATIVE; if (flags & IFA_F_DADFAILED) in6_flags |= IN6_IFF_DUPLICATED; return (in6_flags); } static void export_cache_info6(struct nl_writer *nw, const struct in6_ifaddr *ia) { struct ifa_cacheinfo ci = { .cstamp = ia->ia6_createtime * 1000, .tstamp = ia->ia6_updatetime * 1000, .ifa_prefered = ia->ia6_lifetime.ia6t_pltime, .ifa_valid = ia->ia6_lifetime.ia6t_vltime, }; nlattr_add(nw, IFA_CACHEINFO, sizeof(ci), &ci); } #endif static void export_cache_info(struct nl_writer *nw, struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET6 case AF_INET6: export_cache_info6(nw, (struct in6_ifaddr *)ifa); break; #endif } } /* * {'attrs': [('IFA_ADDRESS', '12.0.0.1'), ('IFA_LOCAL', '12.0.0.1'), ('IFA_LABEL', 'eth10'), ('IFA_FLAGS', 128), ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})], */ static bool dump_iface_addr(struct nl_writer *nw, if_t ifp, struct ifaddr *ifa, const struct nlmsghdr *hdr) { struct ifaddrmsg *ifamsg; struct sockaddr *sa = ifa->ifa_addr; struct sockaddr *sa_dst = ifa->ifa_dstaddr; NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s", ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg))) goto enomem; ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg); ifamsg->ifa_family = sa->sa_family; ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask); ifamsg->ifa_flags = 0; // ifa_flags is useless ifamsg->ifa_scope = ifa_get_scope(ifa); ifamsg->ifa_index = if_getindex(ifp); if ((if_getflags(ifp) & IFF_POINTOPOINT) && sa_dst != NULL && sa_dst->sa_family != 0) { /* P2P interface may have IPv6 LL with no dst address */ dump_sa(nw, IFA_ADDRESS, sa_dst); dump_sa(nw, IFA_LOCAL, sa); } else { dump_sa(nw, IFA_ADDRESS, sa); #ifdef INET /* * In most cases, IFA_ADDRESS == IFA_LOCAL * Skip IFA_LOCAL for anything except INET */ if (sa->sa_family == AF_INET) dump_sa(nw, IFA_LOCAL, sa); #endif } if (if_getflags(ifp) & IFF_BROADCAST) dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr); nlattr_add_string(nw, IFA_LABEL, if_name(ifp)); uint32_t nl_ifa_flags = 0; #ifdef INET6 if (sa->sa_family == AF_INET6) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; nl_ifa_flags = in6_flags_to_nl(ia->ia6_flags); } #endif nlattr_add_u32(nw, IFA_FLAGS, nl_ifa_flags); export_cache_info(nw, ifa); /* Store FreeBSD-specific attributes */ int off = nlattr_add_nested(nw, IFA_FREEBSD); if (off != 0) { if (ifa->ifa_carp != NULL && carp_get_vhid_p != NULL) { uint32_t vhid = (uint32_t)(*carp_get_vhid_p)(ifa); nlattr_add_u32(nw, IFAF_VHID, vhid); } #ifdef INET6 if (sa->sa_family == AF_INET6) { uint32_t ifa_flags = ((struct in6_ifaddr *)ifa)->ia6_flags; nlattr_add_u32(nw, IFAF_FLAGS, ifa_flags); } #endif nlattr_set_len(nw, off); } if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s", rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); nlmsg_abort(nw); return (false); } static int dump_iface_addrs(struct netlink_walkargs *wa, if_t ifp) { struct ifaddr *ifa; struct ifa_iter it; int error = 0; for (ifa = ifa_iter_start(ifp, &it); ifa != NULL; ifa = ifa_iter_next(&it)) { if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family) continue; if (ifa->ifa_addr->sa_family == AF_LINK) continue; if (prison_if(wa->cred, ifa->ifa_addr) != 0) continue; wa->count++; if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) { error = ENOMEM; break; } wa->dumped++; } ifa_iter_finish(&it); return (error); } static int rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { if_t ifp; int error = 0; struct nl_parsed_ifa attrs = {}; error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .cred = nlp_get_cred(nlp), .family = attrs.ifa_family, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, .hdr.nlmsg_type = NL_RTM_NEWADDR, }; NL_LOG(LOG_DEBUG2, "Start dump"); if (attrs.ifa_index != 0) { ifp = ifnet_byindex(attrs.ifa_index); if (ifp == NULL) error = ENOENT; else error = dump_iface_addrs(&wa, ifp); } else { struct if_iter it; for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) { error = dump_iface_addrs(&wa, ifp); if (error != 0) break; } if_iter_finish(&it); } NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } #ifdef INET static int handle_newaddr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) { int plen = attrs->ifa_prefixlen; int if_flags = if_getflags(ifp); struct sockaddr_in *addr, *dst; if (plen > 32) { nlmsg_report_err_msg(npt, "invalid ifa_prefixlen"); return (EINVAL); }; if (if_flags & IFF_POINTOPOINT) { /* * Only P2P IFAs are allowed by the implementation. */ if (attrs->ifa_address == NULL || attrs->ifa_local == NULL) { nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); return (EINVAL); } addr = (struct sockaddr_in *)attrs->ifa_local; dst = (struct sockaddr_in *)attrs->ifa_address; } else { /* * Map the Netlink attributes to FreeBSD ifa layout. * If only IFA_ADDRESS or IFA_LOCAL is set OR * both are set to the same value => ifa is not p2p * and the attribute value contains interface address. * * Otherwise (both IFA_ADDRESS and IFA_LOCAL are set and * different), IFA_LOCAL contains an interface address and * IFA_ADDRESS contains peer address. */ addr = (struct sockaddr_in *)attrs->ifa_local; if (addr == NULL) addr = (struct sockaddr_in *)attrs->ifa_address; if (addr == NULL) { nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); return (EINVAL); } /* Generate broadcast address if not set */ if ((if_flags & IFF_BROADCAST) && attrs->ifa_broadcast == NULL) { uint32_t s_baddr; struct sockaddr_in *sin_brd; if (plen == 31) s_baddr = INADDR_BROADCAST; /* RFC 3021 */ else { uint32_t s_mask; s_mask = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); s_baddr = addr->sin_addr.s_addr | ~s_mask; } sin_brd = (struct sockaddr_in *)npt_alloc(npt, sizeof(*sin_brd)); if (sin_brd == NULL) return (ENOMEM); sin_brd->sin_family = AF_INET; sin_brd->sin_len = sizeof(*sin_brd); sin_brd->sin_addr.s_addr = s_baddr; attrs->ifa_broadcast = (struct sockaddr *)sin_brd; } dst = (struct sockaddr_in *)attrs->ifa_broadcast; } struct sockaddr_in mask = { .sin_len = sizeof(struct sockaddr_in), .sin_family = AF_INET, .sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0), }; struct in_aliasreq req = { .ifra_addr = *addr, .ifra_mask = mask, .ifra_vhid = attrs->ifaf_vhid, }; if (dst != NULL) req.ifra_dstaddr = *dst; return (in_control_ioctl(SIOCAIFADDR, &req, ifp, nlp_get_cred(nlp))); } static int handle_deladdr_inet(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) { struct sockaddr *addr = attrs->ifa_local; if (addr == NULL) addr = attrs->ifa_address; if (addr == NULL) { nlmsg_report_err_msg(npt, "empty IFA_ADDRESS/IFA_LOCAL"); return (EINVAL); } struct ifreq req = { .ifr_addr = *addr }; return (in_control_ioctl(SIOCDIFADDR, &req, ifp, nlp_get_cred(nlp))); } #endif #ifdef INET6 static int handle_newaddr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) { struct sockaddr_in6 *addr, *dst; if (attrs->ifa_prefixlen > 128) { nlmsg_report_err_msg(npt, "invalid ifa_prefixlen"); return (EINVAL); } /* * In IPv6 implementation, adding non-P2P address to the P2P interface * is allowed. */ addr = (struct sockaddr_in6 *)(attrs->ifa_local); dst = (struct sockaddr_in6 *)(attrs->ifa_address); if (addr == NULL) { addr = dst; dst = NULL; } else if (dst != NULL) { if (IN6_ARE_ADDR_EQUAL(&addr->sin6_addr, &dst->sin6_addr)) { /* * Sometimes Netlink users fills in both attributes * with the same address. It still means "non-p2p". */ dst = NULL; } } if (addr == NULL) { nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); return (EINVAL); } uint32_t flags = nl_flags_to_in6(attrs->ifa_flags) | attrs->ifaf_flags; uint32_t pltime = 0, vltime = 0; if (attrs->ifa_cacheinfo != 0) { pltime = attrs->ifa_cacheinfo->ifa_prefered; vltime = attrs->ifa_cacheinfo->ifa_valid; } struct sockaddr_in6 mask = { .sin6_len = sizeof(struct sockaddr_in6), .sin6_family = AF_INET6, }; ip6_writemask(&mask.sin6_addr, attrs->ifa_prefixlen); struct in6_aliasreq req = { .ifra_addr = *addr, .ifra_prefixmask = mask, .ifra_flags = flags, .ifra_lifetime = { .ia6t_vltime = vltime, .ia6t_pltime = pltime }, .ifra_vhid = attrs->ifaf_vhid, }; if (dst != NULL) req.ifra_dstaddr = *dst; return (in6_control_ioctl(SIOCAIFADDR_IN6, &req, ifp, nlp_get_cred(nlp))); } static int handle_deladdr_inet6(struct nlmsghdr *hdr, struct nl_parsed_ifa *attrs, if_t ifp, struct nlpcb *nlp, struct nl_pstate *npt) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)attrs->ifa_local; if (addr == NULL) addr = (struct sockaddr_in6 *)(attrs->ifa_address); if (addr == NULL) { nlmsg_report_err_msg(npt, "Empty IFA_LOCAL/IFA_ADDRESS"); return (EINVAL); } struct in6_ifreq req = { .ifr_addr = *addr }; return (in6_control_ioctl(SIOCDIFADDR_IN6, &req, ifp, nlp_get_cred(nlp))); } #endif static int rtnl_handle_addr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; int error; struct nl_parsed_ifa attrs = {}; error = nl_parse_nlmsg(hdr, &ifa_parser, npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); if_t ifp = ifnet_byindex_ref(attrs.ifa_index); NET_EPOCH_EXIT(et); if (ifp == NULL) { nlmsg_report_err_msg(npt, "Unable to find interface with index %u", attrs.ifa_index); return (ENOENT); } int if_flags = if_getflags(ifp); #if defined(INET) || defined(INET6) bool new = hdr->nlmsg_type == NL_RTM_NEWADDR; #endif /* * TODO: Properly handle NLM_F_CREATE / NLM_F_EXCL. * The current ioctl-based KPI always does an implicit create-or-replace. * It is not possible to specify fine-grained options. */ switch (attrs.ifa_family) { #ifdef INET case AF_INET: if (new) error = handle_newaddr_inet(hdr, &attrs, ifp, nlp, npt); else error = handle_deladdr_inet(hdr, &attrs, ifp, nlp, npt); break; #endif #ifdef INET6 case AF_INET6: if (new) error = handle_newaddr_inet6(hdr, &attrs, ifp, nlp, npt); else error = handle_deladdr_inet6(hdr, &attrs, ifp, nlp, npt); break; #endif default: error = EAFNOSUPPORT; } if (error == 0 && !(if_flags & IFF_UP) && (if_getflags(ifp) & IFF_UP)) if_up(ifp); if_rele(ifp); return (error); } static void rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd) { struct nlmsghdr hdr = {}; struct nl_writer nw; uint32_t group = 0; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: group = RTNLGRP_IPV4_IFADDR; break; #endif #ifdef INET6 case AF_INET6: group = RTNLGRP_IPV6_IFADDR; break; #endif default: NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d", ifa->ifa_addr->sa_family); return; } if (!nl_has_listeners(NETLINK_ROUTE, group)) return; if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, group, false)) { NL_LOG(LOG_DEBUG, "error allocating group writer"); return; } hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR; dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr); nlmsg_flush(&nw); } static void rtnl_handle_ifevent(if_t ifp, int nlmsg_type, int if_flags_mask) { struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type }; struct nl_writer nw; if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK)) return; if (!nl_writer_group(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK, false)) { NL_LOG(LOG_DEBUG, "error allocating group writer"); return; } dump_iface(&nw, ifp, &hdr, if_flags_mask); nlmsg_flush(&nw); } static void rtnl_handle_ifattach(void *arg, if_t ifp) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); } static void rtnl_handle_ifdetach(void *arg, if_t ifp) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0); } static void rtnl_handle_iflink(void *arg, if_t ifp, int link_state __unused) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); } void rtnl_handle_ifnet_event(if_t ifp, int if_flags_mask) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_GETLINK, .name = "RTM_GETLINK", .cb = &rtnl_handle_getlink, .flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL, }, { .cmd = NL_RTM_DELLINK, .name = "RTM_DELLINK", .cb = &rtnl_handle_dellink, .priv = PRIV_NET_IFDESTROY, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_NEWLINK, .name = "RTM_NEWLINK", .cb = &rtnl_handle_newlink, .priv = PRIV_NET_IFCREATE, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_GETADDR, .name = "RTM_GETADDR", .cb = &rtnl_handle_getaddr, .flags = RTNL_F_ALLOW_NONVNET_JAIL, }, { .cmd = NL_RTM_NEWADDR, .name = "RTM_NEWADDR", .cb = &rtnl_handle_addr, .priv = PRIV_NET_ADDIFADDR, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_DELADDR, .name = "RTM_DELADDR", .cb = &rtnl_handle_addr, .priv = PRIV_NET_DELIFADDR, .flags = RTNL_F_NOEPOCH, }, }; static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser, &ifa_parser, &ifa_fbsd_parser, }; void rtnl_iface_add_cloner(struct nl_cloner *cloner) { sx_xlock(&rtnl_cloner_lock); SLIST_INSERT_HEAD(&nl_cloners, cloner, next); sx_xunlock(&rtnl_cloner_lock); } void rtnl_iface_del_cloner(struct nl_cloner *cloner) { sx_xlock(&rtnl_cloner_lock); SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next); sx_xunlock(&rtnl_cloner_lock); } void rtnl_ifaces_init(void) { ifattach_event = EVENTHANDLER_REGISTER( ifnet_arrival_event, rtnl_handle_ifattach, NULL, EVENTHANDLER_PRI_ANY); ifdetach_event = EVENTHANDLER_REGISTER( ifnet_departure_event, rtnl_handle_ifdetach, NULL, EVENTHANDLER_PRI_ANY); ifaddr_event = EVENTHANDLER_REGISTER( rt_addrmsg, rtnl_handle_ifaddr, NULL, EVENTHANDLER_PRI_ANY); iflink_event = EVENTHANDLER_REGISTER( ifnet_link_event, rtnl_handle_iflink, NULL, EVENTHANDLER_PRI_ANY); NL_VERIFY_PARSERS(all_parsers); - rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); + rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); } void rtnl_ifaces_destroy(void) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event); EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event); EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event); EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event); } diff --git a/sys/netlink/route/neigh.c b/sys/netlink/route/neigh.c index c5003eddaba1..ec58c6140db8 100644 --- a/sys/netlink/route/neigh.c +++ b/sys/netlink/route/neigh.c @@ -1,601 +1,601 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* nd6.h requires this */ #include /* nd6 state machine */ #include /* scope deembedding */ #define DEBUG_MOD_NAME nl_neigh #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); static int lle_families[] = { AF_INET, AF_INET6 }; static eventhandler_tag lle_event_p; struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; if_t ifp; int family; int error; int count; int dumped; }; static int lle_state_to_nl_state(int family, struct llentry *lle) { int state = lle->ln_state; switch (family) { case AF_INET: if (lle->la_flags & (LLE_STATIC | LLE_IFADDR)) state = 1; switch (state) { case 0: /* ARP_LLINFO_INCOMPLETE */ return (NUD_INCOMPLETE); case 1: /* ARP_LLINFO_REACHABLE */ return (NUD_REACHABLE); case 2: /* ARP_LLINFO_VERIFY */ return (NUD_PROBE); } break; case AF_INET6: switch (state) { case ND6_LLINFO_INCOMPLETE: return (NUD_INCOMPLETE); case ND6_LLINFO_REACHABLE: return (NUD_REACHABLE); case ND6_LLINFO_STALE: return (NUD_STALE); case ND6_LLINFO_DELAY: return (NUD_DELAY); case ND6_LLINFO_PROBE: return (NUD_PROBE); } break; } return (NUD_NONE); } static uint32_t lle_flags_to_nl_flags(const struct llentry *lle) { uint32_t nl_flags = 0; if (lle->la_flags & LLE_IFADDR) nl_flags |= NTF_SELF; if (lle->la_flags & LLE_PUB) nl_flags |= NTF_PROXY; if (lle->la_flags & LLE_STATIC) nl_flags |= NTF_STICKY; if (lle->ln_router != 0) nl_flags |= NTF_ROUTER; return (nl_flags); } static uint32_t get_lle_next_ts(const struct llentry *lle) { if (lle->la_expire == 0) return (0); return (lle->la_expire + lle->lle_remtime / hz + time_second - time_uptime); } static int dump_lle_locked(struct llentry *lle, void *arg) { struct netlink_walkargs *wa = (struct netlink_walkargs *)arg; struct nlmsghdr *hdr = &wa->hdr; struct nl_writer *nw = wa->nw; struct ndmsg *ndm; #if defined(INET) || defined(INET6) union { struct in_addr in; struct in6_addr in6; } addr; #endif IF_DEBUG_LEVEL(LOG_DEBUG2) { char llebuf[NHOP_PRINT_BUFSIZE]; llentry_print_buf_lltable(lle, llebuf, sizeof(llebuf)); NL_LOG(LOG_DEBUG2, "dumping %s", llebuf); } if (!nlmsg_reply(nw, hdr, sizeof(struct ndmsg))) goto enomem; ndm = nlmsg_reserve_object(nw, struct ndmsg); ndm->ndm_family = wa->family; ndm->ndm_ifindex = if_getindex(wa->ifp); ndm->ndm_state = lle_state_to_nl_state(wa->family, lle); ndm->ndm_flags = lle_flags_to_nl_flags(lle); switch (wa->family) { #ifdef INET case AF_INET: addr.in = lle->r_l3addr.addr4; nlattr_add(nw, NDA_DST, 4, &addr); break; #endif #ifdef INET6 case AF_INET6: addr.in6 = lle->r_l3addr.addr6; in6_clearscope(&addr.in6); nlattr_add(nw, NDA_DST, 16, &addr); break; #endif } if (lle->r_flags & RLLE_VALID) { /* Has L2 */ int addrlen = if_getaddrlen(wa->ifp); nlattr_add(nw, NDA_LLADDR, addrlen, lle->ll_addr); } nlattr_add_u32(nw, NDA_PROBES, lle->la_asked); struct nda_cacheinfo *cache; cache = nlmsg_reserve_attr(nw, NDA_CACHEINFO, struct nda_cacheinfo); if (cache == NULL) goto enomem; /* TODO: provide confirmed/updated */ cache->ndm_refcnt = lle->lle_refcnt; int off = nlattr_add_nested(nw, NDA_FREEBSD); if (off != 0) { nlattr_add_u32(nw, NDAF_NEXT_STATE_TS, get_lle_next_ts(lle)); nlattr_set_len(nw, off); } if (nlmsg_end(nw)) return (0); enomem: NL_LOG(LOG_DEBUG, "unable to dump lle state (ENOMEM)"); nlmsg_abort(nw); return (ENOMEM); } static int dump_lle(struct lltable *llt, struct llentry *lle, void *arg) { int error; LLE_RLOCK(lle); error = dump_lle_locked(lle, arg); LLE_RUNLOCK(lle); return (error); } static bool dump_llt(struct lltable *llt, struct netlink_walkargs *wa) { lltable_foreach_lle(llt, dump_lle, wa); return (true); } static int dump_llts_iface(struct netlink_walkargs *wa, if_t ifp, int family) { int error = 0; wa->ifp = ifp; for (int i = 0; i < sizeof(lle_families) / sizeof(int); i++) { int fam = lle_families[i]; struct lltable *llt = lltable_get(ifp, fam); if (llt != NULL && (family == 0 || family == fam)) { wa->count++; wa->family = fam; if (!dump_llt(llt, wa)) { error = ENOMEM; break; } wa->dumped++; } } return (error); } static int dump_llts(struct netlink_walkargs *wa, if_t ifp, int family) { NL_LOG(LOG_DEBUG2, "Start dump ifp=%s family=%d", ifp ? if_name(ifp) : "NULL", family); wa->hdr.nlmsg_flags |= NLM_F_MULTI; if (ifp != NULL) { dump_llts_iface(wa, ifp, family); } else { struct if_iter it; for (ifp = if_iter_start(&it); ifp != NULL; ifp = if_iter_next(&it)) { dump_llts_iface(wa, ifp, family); } if_iter_finish(&it); } NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa->count, wa->dumped); if (!nlmsg_end_dump(wa->nw, wa->error, &wa->hdr)) { NL_LOG(LOG_DEBUG, "Unable to add new message"); return (ENOMEM); } return (0); } static int get_lle(struct netlink_walkargs *wa, if_t ifp, int family, struct sockaddr *dst) { struct lltable *llt = lltable_get(ifp, family); if (llt == NULL) return (ESRCH); struct llentry *lle = lla_lookup(llt, LLE_UNLOCKED, dst); if (lle == NULL) return (ESRCH); wa->ifp = ifp; wa->family = family; return (dump_lle(llt, lle, wa)); } static void set_scope6(struct sockaddr *sa, if_t ifp) { #ifdef INET6 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); } #endif } struct nl_parsed_neigh { struct sockaddr *nda_dst; struct ifnet *nda_ifp; struct nlattr *nda_lladdr; uint32_t ndaf_next_ts; uint32_t ndm_flags; uint16_t ndm_state; uint8_t ndm_family; }; #define _IN(_field) offsetof(struct ndmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_neigh, _field) static const struct nlattr_parser nla_p_neigh_fbsd[] = { { .type = NDAF_NEXT_STATE_TS, .off = _OUT(ndaf_next_ts), .cb = nlattr_get_uint32 }, }; NL_DECLARE_ATTR_PARSER(neigh_fbsd_parser, nla_p_neigh_fbsd); static const struct nlfield_parser nlf_p_neigh[] = { { .off_in = _IN(ndm_family), .off_out = _OUT(ndm_family), .cb = nlf_get_u8 }, { .off_in = _IN(ndm_flags), .off_out = _OUT(ndm_flags), .cb = nlf_get_u8_u32 }, { .off_in = _IN(ndm_state), .off_out = _OUT(ndm_state), .cb = nlf_get_u16 }, { .off_in = _IN(ndm_ifindex), .off_out = _OUT(nda_ifp), .cb = nlf_get_ifpz }, }; static const struct nlattr_parser nla_p_neigh[] = { { .type = NDA_DST, .off = _OUT(nda_dst), .cb = nlattr_get_ip }, { .type = NDA_LLADDR, .off = _OUT(nda_lladdr), .cb = nlattr_get_nla }, { .type = NDA_IFINDEX, .off = _OUT(nda_ifp), .cb = nlattr_get_ifp }, { .type = NDA_FLAGS_EXT, .off = _OUT(ndm_flags), .cb = nlattr_get_uint32 }, { .type = NDA_FREEBSD, .arg = &neigh_fbsd_parser, .cb = nlattr_get_nested }, }; #undef _IN #undef _OUT static bool post_p_neigh(void *_attrs, struct nl_pstate *npt __unused) { struct nl_parsed_neigh *attrs = (struct nl_parsed_neigh *)_attrs; set_scope6(attrs->nda_dst, attrs->nda_ifp); return (true); } NL_DECLARE_PARSER_EXT(ndmsg_parser, struct ndmsg, NULL, nlf_p_neigh, nla_p_neigh, post_p_neigh); /* * type=RTM_NEWNEIGH, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1661941473, pid=0}, * {ndm_family=AF_INET6, ndm_ifindex=if_nametoindex("enp0s31f6"), ndm_state=NUD_PERMANENT, ndm_flags=0, ndm_type=RTN_UNSPEC}, * [ * {{nla_len=20, nla_type=NDA_DST}, inet_pton(AF_INET6, "2a01:4f8:13a:70c::3")}, * {{nla_len=10, nla_type=NDA_LLADDR}, 20:4e:71:62:ae:f2}]}, iov_len=60} */ static int rtnl_handle_newneigh(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { int error; struct nl_parsed_neigh attrs = {}; error = nl_parse_nlmsg(hdr, &ndmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.nda_ifp == NULL || attrs.nda_dst == NULL || attrs.nda_lladdr == NULL) { if (attrs.nda_ifp == NULL) NLMSG_REPORT_ERR_MSG(npt, "NDA_IFINDEX / ndm_ifindex not set"); if (attrs.nda_dst == NULL) NLMSG_REPORT_ERR_MSG(npt, "NDA_DST not set"); if (attrs.nda_lladdr == NULL) NLMSG_REPORT_ERR_MSG(npt, "NDA_LLADDR not set"); return (EINVAL); } if (attrs.nda_dst->sa_family != attrs.ndm_family) { NLMSG_REPORT_ERR_MSG(npt, "NDA_DST family (%d) is different from ndm_family (%d)", attrs.nda_dst->sa_family, attrs.ndm_family); return (EINVAL); } int addrlen = if_getaddrlen(attrs.nda_ifp); if (attrs.nda_lladdr->nla_len != sizeof(struct nlattr) + addrlen) { NLMSG_REPORT_ERR_MSG(npt, "NDA_LLADDR address length (%d) is different from expected (%d)", (int)attrs.nda_lladdr->nla_len - (int)sizeof(struct nlattr), addrlen); return (EINVAL); } const uint16_t supported_flags = NTF_PROXY | NTF_STICKY; if ((attrs.ndm_flags & supported_flags) != attrs.ndm_flags) { NLMSG_REPORT_ERR_MSG(npt, "ndm_flags %X not supported", attrs.ndm_flags &~ supported_flags); return (ENOTSUP); } /* Replacement requires new entry creation anyway */ if ((hdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) == 0) return (ENOTSUP); struct lltable *llt = lltable_get(attrs.nda_ifp, attrs.ndm_family); if (llt == NULL) return (EAFNOSUPPORT); uint8_t linkhdr[LLE_MAX_LINKHDR]; size_t linkhdrsize = sizeof(linkhdr); int lladdr_off = 0; if (lltable_calc_llheader(attrs.nda_ifp, attrs.ndm_family, (char *)(attrs.nda_lladdr + 1), linkhdr, &linkhdrsize, &lladdr_off) != 0) { NLMSG_REPORT_ERR_MSG(npt, "unable to calculate lle prepend data"); return (EINVAL); } int lle_flags = (attrs.ndm_flags & NTF_PROXY) ? LLE_PUB : 0; if (attrs.ndm_flags & NTF_STICKY) lle_flags |= LLE_STATIC; struct llentry *lle = lltable_alloc_entry(llt, lle_flags, attrs.nda_dst); if (lle == NULL) return (ENOMEM); lltable_set_entry_addr(attrs.nda_ifp, lle, linkhdr, linkhdrsize, lladdr_off); if (attrs.ndm_flags & NTF_STICKY) lle->la_expire = 0; else lle->la_expire = attrs.ndaf_next_ts - time_second + time_uptime; /* llentry created, try to insert or update */ IF_AFDATA_WLOCK(attrs.nda_ifp); LLE_WLOCK(lle); struct llentry *lle_tmp = lla_lookup(llt, LLE_EXCLUSIVE, attrs.nda_dst); if (lle_tmp != NULL) { error = EEXIST; if (hdr->nlmsg_flags & NLM_F_REPLACE) { error = EPERM; if ((lle_tmp->la_flags & LLE_IFADDR) == 0) { error = 0; /* success */ lltable_unlink_entry(llt, lle_tmp); llentry_free(lle_tmp); lle_tmp = NULL; lltable_link_entry(llt, lle); } } if (lle_tmp) LLE_WUNLOCK(lle_tmp); } else { if (hdr->nlmsg_flags & NLM_F_CREATE) lltable_link_entry(llt, lle); else error = ENOENT; } IF_AFDATA_WUNLOCK(attrs.nda_ifp); if (error != 0) { /* throw away the newly allocated llentry */ llentry_free(lle); return (error); } /* XXX: We're inside epoch */ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED); LLE_WUNLOCK(lle); llt->llt_post_resolved(llt, lle); return (0); } static int rtnl_handle_delneigh(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { int error; struct nl_parsed_neigh attrs = {}; error = nl_parse_nlmsg(hdr, &ndmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.nda_dst == NULL) { NLMSG_REPORT_ERR_MSG(npt, "NDA_DST not set"); return (EINVAL); } if (attrs.nda_ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "no ifindex provided"); return (EINVAL); } struct lltable *llt = lltable_get(attrs.nda_ifp, attrs.ndm_family); if (llt == NULL) return (EAFNOSUPPORT); return (lltable_delete_addr(llt, 0, attrs.nda_dst)); } static int rtnl_handle_getneigh(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { int error; struct nl_parsed_neigh attrs = {}; error = nl_parse_nlmsg(hdr, &ndmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.nda_dst != NULL && attrs.nda_ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "has NDA_DST but no ifindex provided"); return (EINVAL); } struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEIGH, }; if (attrs.nda_dst == NULL) error = dump_llts(&wa, attrs.nda_ifp, attrs.ndm_family); else error = get_lle(&wa, attrs.nda_ifp, attrs.ndm_family, attrs.nda_dst); return (error); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_NEWNEIGH, .name = "RTM_NEWNEIGH", .cb = &rtnl_handle_newneigh, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_DELNEIGH, .name = "RTM_DELNEIGH", .cb = &rtnl_handle_delneigh, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_GETNEIGH, .name = "RTM_GETNEIGH", .cb = &rtnl_handle_getneigh, } }; static void rtnl_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct nl_writer nw; if_t ifp; int family; LLE_WLOCK_ASSERT(lle); ifp = lltable_get_ifp(lle->lle_tbl); family = lltable_get_af(lle->lle_tbl); if (family != AF_INET && family != AF_INET6) return; int nlmsgs_type = evt == LLENTRY_RESOLVED ? NL_RTM_NEWNEIGH : NL_RTM_DELNEIGH; if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEIGH, false)) { NL_LOG(LOG_DEBUG, "error allocating group writer"); return; } struct netlink_walkargs wa = { .hdr.nlmsg_type = nlmsgs_type, .nw = &nw, .ifp = ifp, .family = family, }; dump_lle_locked(lle, &wa); nlmsg_flush(&nw); } static const struct nlhdr_parser *all_parsers[] = { &ndmsg_parser, &neigh_fbsd_parser }; void rtnl_neighs_init(void) { NL_VERIFY_PARSERS(all_parsers); - rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); + rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); lle_event_p = EVENTHANDLER_REGISTER(lle_event, rtnl_lle_event, NULL, EVENTHANDLER_PRI_ANY); } void rtnl_neighs_destroy(void) { EVENTHANDLER_DEREGISTER(lle_event, lle_event_p); } diff --git a/sys/netlink/route/nexthop.c b/sys/netlink/route/nexthop.c index 90ed3497047e..03f1a57fd1e4 100644 --- a/sys/netlink/route/nexthop.c +++ b/sys/netlink/route/nexthop.c @@ -1,1123 +1,1123 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_nhop #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); /* * This file contains the logic to maintain kernel nexthops and * nexhop groups based om the data provided by the user. * * Kernel stores (nearly) all of the routing data in the nexthops, * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT). * * Netlink API provides higher-level abstraction for the user. Each * user-created nexthop may map to multiple kernel nexthops. * * The following variations require separate kernel nexthop to be * created: * * prefix flags (NHF_HOST, NHF_DEFAULT) * * using IPv6 gateway for IPv4 routes * * different fibnum * * These kernel nexthops have the lifetime bound to the lifetime of * the user_nhop object. They are not collected until user requests * to delete the created user_nhop. * */ struct user_nhop { uint32_t un_idx; /* Userland-provided index */ uint32_t un_fibfam; /* fibnum+af(as highest byte) */ uint8_t un_protocol; /* protocol that install the record */ struct nhop_object *un_nhop; /* "production" nexthop */ struct nhop_object *un_nhop_src; /* nexthop to copy from */ struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */ uint32_t un_nhgrp_count; /* number of nexthops */ struct user_nhop *un_next; /* next item in hash chain */ struct user_nhop *un_nextchild; /* master -> children */ struct epoch_context un_epoch_ctx; /* epoch ctl helper */ }; /* produce hash value for an object */ #define unhop_hash_obj(_obj) (hash_unhop(_obj)) /* compare two objects */ #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two)) /* next object accessor */ #define unhop_next(_obj) (_obj)->un_next CHT_SLIST_DEFINE(unhop, struct user_nhop); struct unhop_ctl { struct unhop_head un_head; struct rmlock un_lock; }; #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl") #define UN_TRACKER struct rm_priotracker un_tracker #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker) #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker) #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock); #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock); VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL; #define V_un_ctl VNET(un_ctl) static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size); static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b); static unsigned int hash_unhop(const struct user_nhop *obj); static void destroy_unhop(struct user_nhop *unhop); static struct nhop_object *clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags); static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b) { return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam); } /* * Hash callback: calculate hash of an object */ static unsigned int hash_unhop(const struct user_nhop *obj) { return (obj->un_idx ^ obj->un_fibfam); } #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0) /* * Factory interface for creating matching kernel nexthops/nexthop groups * * @uidx: userland nexhop index used to create the nexthop * @fibnum: fibnum nexthop will be used in * @family: upper family nexthop will be used in * @nh_flags: desired nexthop prefix flags * @perror: pointer to store error to * * Returns referenced nexthop linked to @fibnum/@family rib on success. */ struct nhop_object * nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, int nh_flags, int *perror) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); UN_TRACKER; if (__predict_false(ctl == NULL)) return (NULL); struct user_nhop key= { .un_idx = uidx, .un_fibfam = fibnum | ((uint32_t)family) << 24, }; struct user_nhop *unhop; nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT); if (__predict_false(family == 0)) return (NULL); UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop != NULL) { struct nhop_object *nh = unhop->un_nhop; UN_RLOCK(ctl); *perror = 0; nhop_ref_any(nh); return (nh); } /* * Exact nexthop not found. Search for template nexthop to clone from. */ key.un_fibfam = 0; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { UN_RUNLOCK(ctl); *perror = ESRCH; return (NULL); } UN_RUNLOCK(ctl); /* Create entry to insert first */ struct user_nhop *un_new, *un_tmp; un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); if (un_new == NULL) { *perror = ENOMEM; return (NULL); } un_new->un_idx = uidx; un_new->un_fibfam = fibnum | ((uint32_t)family) << 24; /* Relying on epoch to protect unhop here */ un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags); if (un_new->un_nhop == NULL) { free(un_new, M_NETLINK); *perror = ENOMEM; return (NULL); } /* Insert back and report */ UN_WLOCK(ctl); /* First, find template record once again */ CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { /* Someone deleted the nexthop during the call */ UN_WUNLOCK(ctl); *perror = ESRCH; destroy_unhop(un_new); return (NULL); } /* Second, check the direct match */ CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp); struct nhop_object *nh; if (un_tmp != NULL) { /* Another thread already created the desired nextop, use it */ nh = un_tmp->un_nhop; } else { /* Finally, insert the new nexthop and link it to the primary */ nh = un_new->un_nhop; CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new); un_new->un_nextchild = unhop->un_nextchild; unhop->un_nextchild = un_new; un_new = NULL; NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh); } UN_WUNLOCK(ctl); if (un_new != NULL) destroy_unhop(un_new); *perror = 0; nhop_ref_any(nh); return (nh); } static struct user_nhop * nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx) { struct user_nhop key= { .un_idx = uidx }; struct user_nhop *unhop = NULL; UN_TRACKER; UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); UN_RUNLOCK(ctl); return (unhop); } #define MAX_STACK_NHOPS 4 static struct nhop_object * clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags) { #ifdef ROUTE_MPATH const struct weightened_nhop *wn; struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS]; uint32_t num_nhops; #endif struct nhop_object *nh = NULL; int error; if (unhop->un_nhop_src != NULL) { IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src, "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum, family, nh_flags); } struct nhop_object *nh; nh = nhop_alloc(fibnum, AF_UNSPEC); if (nh == NULL) return (NULL); nhop_copy(nh, unhop->un_nhop_src); /* Check that nexthop gateway is compatible with the new family */ if (!nhop_set_upper_family(nh, family)) { nhop_free(nh); return (NULL); } nhop_set_uidx(nh, unhop->un_idx); nhop_set_pxtype_flag(nh, nh_flags); return (nhop_get_nhop(nh, &error)); } #ifdef ROUTE_MPATH wn = unhop->un_nhgrp_src; num_nhops = unhop->un_nhgrp_count; if (num_nhops > MAX_STACK_NHOPS) { wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT); if (wn_new == NULL) return (NULL); } else wn_new = wn_base; for (int i = 0; i < num_nhops; i++) { uint32_t uidx = nhop_get_uidx(wn[i].nh); MPASS(uidx != 0); wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error); if (error != 0) break; wn_new[i].weight = wn[i].weight; } if (error == 0) { struct rib_head *rh = nhop_get_rh(wn_new[0].nh); struct nhgrp_object *nhg; error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg); nh = (struct nhop_object *)nhg; } if (wn_new != wn_base) free(wn_new, M_TEMP); #endif return (nh); } static void destroy_unhop(struct user_nhop *unhop) { if (unhop->un_nhop != NULL) nhop_free_any(unhop->un_nhop); if (unhop->un_nhop_src != NULL) nhop_free_any(unhop->un_nhop_src); free(unhop, M_NETLINK); } static void destroy_unhop_epoch(epoch_context_t ctx) { struct user_nhop *unhop; unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx); destroy_unhop(unhop); } static uint32_t find_spare_uidx(struct unhop_ctl *ctl) { struct user_nhop *unhop, key = {}; uint32_t uidx = 0; UN_TRACKER; UN_RLOCK(ctl); /* This should return spare uid with 75% of 65k used in ~99/100 cases */ for (int i = 0; i < 16; i++) { key.un_idx = (arc4random() % 65536) + 65536 * 4; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { uidx = key.un_idx; break; } } UN_RUNLOCK(ctl); return (uidx); } /* * Actual netlink code */ struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; int family; int error; int count; int dumped; }; #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem static bool dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) goto enomem; struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); nhm->nh_family = AF_UNSPEC; nhm->nh_scope = 0; nhm->nh_protocol = unhop->un_protocol; nhm->nh_flags = 0; nlattr_add_u32(nw, NHA_ID, unhop->un_idx); nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH); struct weightened_nhop *wn = unhop->un_nhgrp_src; uint32_t num_nhops = unhop->un_nhgrp_count; /* TODO: a better API? */ int nla_len = sizeof(struct nlattr); nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp)); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) goto enomem; nla->nla_type = NHA_GROUP; nla->nla_len = nla_len; for (int i = 0; i < num_nhops; i++) { struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i]; grp->id = nhop_get_uidx(wn[i].nh); grp->weight = wn[i].weight; grp->resvd1 = 0; grp->resvd2 = 0; } if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory"); nlmsg_abort(nw); return (false); } static bool dump_nhop(const struct nhop_object *nh, uint32_t uidx, struct nlmsghdr *hdr, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) goto enomem; struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); ENOMEM_IF_NULL(nhm); nhm->nh_family = nhop_get_neigh_family(nh); nhm->nh_scope = 0; // XXX: what's that? nhm->nh_protocol = nhop_get_origin(nh); nhm->nh_flags = 0; if (uidx != 0) nlattr_add_u32(nw, NHA_ID, uidx); if (nh->nh_flags & NHF_BLACKHOLE) { nlattr_add_flag(nw, NHA_BLACKHOLE); goto done; } nlattr_add_u32(nw, NHA_OIF, if_getindex(nh->nh_ifp)); switch (nh->gw_sa.sa_family) { #ifdef INET case AF_INET: nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr); break; #endif #ifdef INET6 case AF_INET6: { struct in6_addr addr = nh->gw6_sa.sin6_addr; in6_clearscope(&addr); nlattr_add(nw, NHA_GATEWAY, 16, &addr); break; } #endif } int off = nlattr_add_nested(nw, NHA_FREEBSD); if (off != 0) { nlattr_add_u32(nw, NHAF_AIF, if_getindex(nh->nh_aifp)); if (uidx == 0) { nlattr_add_u32(nw, NHAF_KID, nhop_get_idx(nh)); nlattr_add_u32(nw, NHAF_FAMILY, nhop_get_upper_family(nh)); nlattr_add_u32(nw, NHAF_TABLE, nhop_get_fibnum(nh)); } nlattr_set_len(nw, off); } done: if (nlmsg_end(nw)) return (true); enomem: nlmsg_abort(nw); return (false); } static void dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { if (unhop->un_nhop_src != NULL) dump_nhop(unhop->un_nhop_src, unhop->un_idx, hdr, nw); else dump_nhgrp(unhop, hdr, nw); } static int delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx) { struct user_nhop *unhop_ret, *unhop_base, *unhop_chain; struct nl_writer nw; struct user_nhop key = { .un_idx = uidx }; UN_WLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base); if (unhop_base != NULL) { CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret); IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop, "removed base nhop %u: %s", uidx, nhbuf); } /* Unlink all child nexhops as well, keeping the chain intact */ unhop_chain = unhop_base->un_nextchild; while (unhop_chain != NULL) { CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain, unhop_ret); MPASS(unhop_chain == unhop_ret); IF_DEBUG_LEVEL(LOG_DEBUG3) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop_chain->un_nhop, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop, "removed child nhop %u: %s", uidx, nhbuf); } unhop_chain = unhop_chain->un_nextchild; } } UN_WUNLOCK(ctl); if (unhop_base == NULL) { NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx); return (ENOENT); } /* Report nexthop deletion */ struct netlink_walkargs wa = { .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_DELNEXTHOP, }; if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP, false)) { NL_LOG(LOG_DEBUG, "error allocating message writer"); return (ENOMEM); } dump_unhop(unhop_base, &wa.hdr, &nw); nlmsg_flush(&nw); while (unhop_base != NULL) { unhop_chain = unhop_base->un_nextchild; NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx); unhop_base = unhop_chain; } return (0); } static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size) { void *new_ptr = NULL; size_t alloc_size; if (new_size == 0) return; if (new_size != 0) { alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size); new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); if (new_ptr == NULL) return; } NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size); UN_WLOCK(ctl); if (new_ptr != NULL) { CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size); } UN_WUNLOCK(ctl); if (new_ptr != NULL) free(new_ptr, M_NETLINK); } static bool __noinline vnet_init_unhops(void) { uint32_t num_buckets = 16; size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK, M_NOWAIT | M_ZERO); if (ctl == NULL) return (false); void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); if (ptr == NULL) { free(ctl, M_NETLINK); return (false); } CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets); UN_LOCK_INIT(ctl); if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) { free(ptr, M_NETLINK); free(ctl, M_NETLINK); } if (atomic_load_ptr(&V_un_ctl) == NULL) return (false); NL_LOG(LOG_NOTICE, "UNHOPS init done"); return (true); } static void vnet_destroy_unhops(const void *unused __unused) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); struct user_nhop *unhop, *tmp; if (ctl == NULL) return; V_un_ctl = NULL; /* Wait till all unhop users finish their reads */ NET_EPOCH_WAIT(); UN_WLOCK(ctl); CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) { destroy_unhop(unhop); } CHT_SLIST_FOREACH_SAFE_END; UN_WUNLOCK(ctl); free(ctl->un_head.ptr, M_NETLINK); free(ctl, M_NETLINK); } VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_destroy_unhops, NULL); static int nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { int error = 0; /* Verify attribute correctness */ struct nexthop_grp *grp = NLA_DATA(nla); int data_len = NLA_DATA_LEN(nla); int count = data_len / sizeof(*grp); if (count == 0 || (count * sizeof(*grp) != data_len)) { NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len); return (EINVAL); } *((struct nlattr **)target) = nla; return (error); } static void set_scope6(struct sockaddr *sa, if_t ifp) { #ifdef INET6 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); } #endif } struct nl_parsed_nhop { uint32_t nha_id; uint8_t nha_blackhole; uint8_t nha_groups; uint8_t nhaf_knhops; uint8_t nhaf_family; struct ifnet *nha_oif; struct sockaddr *nha_gw; struct nlattr *nha_group; uint8_t nh_family; uint8_t nh_protocol; uint32_t nhaf_table; uint32_t nhaf_kid; uint32_t nhaf_aif; }; #define _IN(_field) offsetof(struct nhmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field) static struct nlattr_parser nla_p_nh_fbsd[] = { { .type = NHAF_KNHOPS, .off = _OUT(nhaf_knhops), .cb = nlattr_get_flag }, { .type = NHAF_TABLE, .off = _OUT(nhaf_table), .cb = nlattr_get_uint32 }, { .type = NHAF_FAMILY, .off = _OUT(nhaf_family), .cb = nlattr_get_uint8 }, { .type = NHAF_KID, .off = _OUT(nhaf_kid), .cb = nlattr_get_uint32 }, { .type = NHAF_AIF, .off = _OUT(nhaf_aif), .cb = nlattr_get_uint32 }, }; NL_DECLARE_ATTR_PARSER(nh_fbsd_parser, nla_p_nh_fbsd); static const struct nlfield_parser nlf_p_nh[] = { { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 }, { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 }, }; static const struct nlattr_parser nla_p_nh[] = { { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 }, { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg }, { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag }, { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp }, { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip }, { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag }, { .type = NHA_FREEBSD, .arg = &nh_fbsd_parser, .cb = nlattr_get_nested }, }; #undef _IN #undef _OUT static bool post_p_nh(void *_attrs, struct nl_pstate *npt) { struct nl_parsed_nhop *attrs = (struct nl_parsed_nhop *)_attrs; set_scope6(attrs->nha_gw, attrs->nha_oif); return (true); } NL_DECLARE_PARSER_EXT(nhmsg_parser, struct nhmsg, NULL, nlf_p_nh, nla_p_nh, post_p_nh); static bool eligible_nhg(const struct nhop_object *nh) { return (nh->nh_flags & NHF_GATEWAY); } static int newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { struct nexthop_grp *grp = NLA_DATA(attrs->nha_group); int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp); struct weightened_nhop *wn; wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO); if (wn == NULL) return (ENOMEM); for (int i = 0; i < count; i++) { struct user_nhop *unhop; unhop = nl_find_base_unhop(ctl, grp[i].id); if (unhop == NULL) { NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id); free(wn, M_NETLINK); return (ESRCH); } else if (unhop->un_nhop_src == NULL) { NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported", grp[i].id); free(wn, M_NETLINK); return (ENOTSUP); } else if (!eligible_nhg(unhop->un_nhop_src)) { NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible", grp[i].id); free(wn, M_NETLINK); return (ENOTSUP); } /* * TODO: consider more rigid eligibility checks: * restrict nexthops with the same gateway */ wn[i].nh = unhop->un_nhop_src; wn[i].weight = grp[i].weight; } unhop->un_nhgrp_src = wn; unhop->un_nhgrp_count = count; return (0); } /* * Sets nexthop @nh gateway specified by @gw. * If gateway is IPv6 link-local, alters @gw to include scopeid equal to * @ifp ifindex. * Returns 0 on success or errno. */ int nl_set_nexthop_gw(struct nhop_object *nh, struct sockaddr *gw, if_t ifp, struct nl_pstate *npt) { #ifdef INET6 if (gw->sa_family == AF_INET6) { struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)gw; if (IN6_IS_ADDR_LINKLOCAL(&gw6->sin6_addr)) { if (ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "interface not set"); return (EINVAL); } in6_set_unicast_scopeid(&gw6->sin6_addr, if_getindex(ifp)); } } #endif nhop_set_gw(nh, gw, true); return (0); } static int newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop, struct nl_pstate *npt) { struct ifaddr *ifa = NULL; struct nhop_object *nh; int error; if (!attrs->nha_blackhole) { if (attrs->nha_gw == NULL) { NLMSG_REPORT_ERR_MSG(npt, "missing NHA_GATEWAY"); return (EINVAL); } if (attrs->nha_oif == NULL) { NLMSG_REPORT_ERR_MSG(npt, "missing NHA_OIF"); return (EINVAL); } if (ifa == NULL) ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif); if (ifa == NULL) { NLMSG_REPORT_ERR_MSG(npt, "Unable to determine default source IP"); return (EINVAL); } } int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family; nh = nhop_alloc(RT_DEFAULT_FIB, family); if (nh == NULL) { NL_LOG(LOG_DEBUG, "Unable to allocate nexthop"); return (ENOMEM); } nhop_set_uidx(nh, attrs->nha_id); nhop_set_origin(nh, attrs->nh_protocol); if (attrs->nha_blackhole) nhop_set_blackhole(nh, NHF_BLACKHOLE); else { error = nl_set_nexthop_gw(nh, attrs->nha_gw, attrs->nha_oif, npt); if (error != 0) { nhop_free(nh); return (error); } nhop_set_transmit_ifp(nh, attrs->nha_oif); nhop_set_src(nh, ifa); } error = nhop_get_unlinked(nh); if (error != 0) { NL_LOG(LOG_DEBUG, "unable to finalize nexthop"); return (error); } IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf(nh, nhbuf, sizeof(nhbuf)); NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf); } unhop->un_nhop_src = nh; return (0); } static int rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct nl_writer nw; struct user_nhop *unhop; int error; if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops())) return (ENOMEM); struct unhop_ctl *ctl = V_un_ctl; struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); /* * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class * citizen. */ if (attrs.nha_id == 0) { attrs.nha_id = find_spare_uidx(ctl); if (attrs.nha_id == 0) { NL_LOG(LOG_DEBUG, "Unable to get spare uidx"); return (ENOSPC); } } NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? if_getindex(attrs.nha_oif) : 0); unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); if (unhop == NULL) { NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop"); return (ENOMEM); } unhop->un_idx = attrs.nha_id; unhop->un_protocol = attrs.nh_protocol; if (attrs.nha_group) error = newnhg(ctl, &attrs, unhop); else error = newnhop(&attrs, unhop, npt); if (error != 0) { free(unhop, M_NETLINK); return (error); } UN_WLOCK(ctl); /* Check if uidx already exists */ struct user_nhop *tmp = NULL; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp); if (tmp != NULL) { UN_WUNLOCK(ctl); NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id); destroy_unhop(unhop); return (EEXIST); } CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop); uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head); UN_WUNLOCK(ctl); /* Report addition of the next nexhop */ struct netlink_walkargs wa = { .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, }; if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP, false)) { NL_LOG(LOG_DEBUG, "error allocating message writer"); return (ENOMEM); } dump_unhop(unhop, &wa.hdr, &nw); nlmsg_flush(&nw); consider_resize(ctl, num_buckets_new); return (0); } static int rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); int error; if (__predict_false(ctl == NULL)) return (ESRCH); struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.nha_id == 0) { NL_LOG(LOG_DEBUG, "NHA_ID not set"); return (EINVAL); } error = delete_unhop(ctl, hdr, attrs.nha_id); return (error); } static bool match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id) return (false); if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL) return (false); if (attrs->nha_oif != NULL && (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif)) return (false); return (true); } static int rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct user_nhop *unhop; UN_TRACKER; int error; struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, }; if (attrs.nha_id != 0) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); struct user_nhop key = { .un_idx = attrs.nha_id }; if (__predict_false(ctl == NULL)) return (ESRCH); NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id); UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); UN_RUNLOCK(ctl); if (unhop == NULL) return (ESRCH); dump_unhop(unhop, &wa.hdr, wa.nw); return (0); } else if (attrs.nhaf_kid != 0) { struct nhop_iter iter = { .fibnum = attrs.nhaf_table, .family = attrs.nhaf_family, }; int error = ESRCH; NL_LOG(LOG_DEBUG2, "START table %u family %d", attrs.nhaf_table, attrs.nhaf_family); for (struct nhop_object *nh = nhops_iter_start(&iter); nh; nh = nhops_iter_next(&iter)) { NL_LOG(LOG_DEBUG3, "get %u", nhop_get_idx(nh)); if (nhop_get_idx(nh) == attrs.nhaf_kid) { dump_nhop(nh, 0, &wa.hdr, wa.nw); error = 0; break; } } nhops_iter_stop(&iter); return (error); } else if (attrs.nhaf_knhops) { struct nhop_iter iter = { .fibnum = attrs.nhaf_table, .family = attrs.nhaf_family, }; NL_LOG(LOG_DEBUG2, "DUMP table %u family %d", attrs.nhaf_table, attrs.nhaf_family); wa.hdr.nlmsg_flags |= NLM_F_MULTI; for (struct nhop_object *nh = nhops_iter_start(&iter); nh; nh = nhops_iter_next(&iter)) { dump_nhop(nh, 0, &wa.hdr, wa.nw); } nhops_iter_stop(&iter); } else { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); if (__predict_false(ctl == NULL)) return (ESRCH); NL_LOG(LOG_DEBUG2, "DUMP unhops"); UN_RLOCK(ctl); wa.hdr.nlmsg_flags |= NLM_F_MULTI; CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) { if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop)) dump_unhop(unhop, &wa.hdr, wa.nw); } CHT_SLIST_FOREACH_END; UN_RUNLOCK(ctl); } if (wa.error == 0) { if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) return (ENOMEM); } return (0); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_NEWNEXTHOP, .name = "RTM_NEWNEXTHOP", .cb = &rtnl_handle_newnhop, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_DELNEXTHOP, .name = "RTM_DELNEXTHOP", .cb = &rtnl_handle_delnhop, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_GETNEXTHOP, .name = "RTM_GETNEXTHOP", .cb = &rtnl_handle_getnhop, } }; static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser, &nh_fbsd_parser }; void rtnl_nexthops_init(void) { NL_VERIFY_PARSERS(all_parsers); - rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); + rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); } diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c index c9b0f56498ec..14bd73d33411 100644 --- a/sys/netlink/route/rt.c +++ b/sys/netlink/route/rt.c @@ -1,1123 +1,1123 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Ng Peng Nam Sean * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_route #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); static unsigned char get_rtm_type(const struct nhop_object *nh) { int nh_flags = nh->nh_flags; /* Use the fact that nhg runtime flags are only NHF_MULTIPATH */ if (nh_flags & NHF_BLACKHOLE) return (RTN_BLACKHOLE); else if (nh_flags & NHF_REJECT) return (RTN_PROHIBIT); return (RTN_UNICAST); } static uint8_t nl_get_rtm_protocol(const struct nhop_object *nh) { #ifdef ROUTE_MPATH if (NH_IS_NHGRP(nh)) { const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh; uint8_t origin = nhgrp_get_origin(nhg); if (origin != RTPROT_UNSPEC) return (origin); nh = nhg->nhops[0]; } #endif uint8_t origin = nhop_get_origin(nh); if (origin != RTPROT_UNSPEC) return (origin); /* TODO: remove guesswork once all kernel users fill in origin */ int rt_flags = nhop_get_rtflags(nh); if (rt_flags & RTF_PROTO1) return (RTPROT_ZEBRA); if (rt_flags & RTF_STATIC) return (RTPROT_STATIC); return (RTPROT_KERNEL); } static int get_rtmsg_type_from_rtsock(int cmd) { switch (cmd) { case RTM_ADD: case RTM_CHANGE: case RTM_GET: return NL_RTM_NEWROUTE; case RTM_DELETE: return NL_RTM_DELROUTE; } return (0); } /* * fibnum heuristics * * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS * msg rtm_table RTA_TABLE result * RTM_GETROUTE/dump 0 - RT_ALL_FIBS * RTM_GETROUTE/dump 1 - 1 * RTM_GETROUTE/get 0 - 0 * */ static struct nhop_object * rc_get_nhop(const struct rib_cmd_info *rc) { return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new); } static void dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh) { #ifdef INET6 int upper_family; #endif switch (nhop_get_neigh_family(nh)) { case AF_LINK: /* onlink prefix, skip */ break; case AF_INET: nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr); break; #ifdef INET6 case AF_INET6: upper_family = nhop_get_upper_family(nh); if (upper_family == AF_INET6) { struct in6_addr gw6 = nh->gw6_sa.sin6_addr; in6_clearscope(&gw6); nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6); } else if (upper_family == AF_INET) { /* IPv4 over IPv6 */ struct in6_addr gw6 = nh->gw6_sa.sin6_addr; in6_clearscope(&gw6); char buf[20]; struct rtvia *via = (struct rtvia *)&buf[0]; via->rtvia_family = AF_INET6; memcpy(via->rtvia_addr, &gw6, 16); nlattr_add(nw, NL_RTA_VIA, 17, via); } break; #endif } } static void dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh) { int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) return; nla->nla_type = NL_RTA_METRICS; nla->nla_len = nla_len; nla++; nla->nla_type = NL_RTAX_MTU; nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t); *((uint32_t *)(nla + 1)) = nh->nh_mtu; } #ifdef ROUTE_MPATH static void dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm) { uint32_t uidx = nhgrp_get_uidx(nhg); uint32_t num_nhops; const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops); uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh); if (uidx != 0) nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg)); nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags); int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH); if (off == 0) return; for (int i = 0; i < num_nhops; i++) { int nh_off = nlattr_save_offset(nw); struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop); if (rtnh == NULL) return; rtnh->rtnh_flags = 0; rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp); rtnh->rtnh_hops = wn[i].weight; dump_rc_nhop_gw(nw, wn[i].nh); uint32_t rtflags = nhop_get_rtflags(wn[i].nh); if (rtflags != base_rtflags) nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); if (rtflags & RTF_FIXEDMTU) dump_rc_nhop_mtu(nw, wn[i].nh); rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop); /* * nlattr_add() allocates 4-byte aligned storage, no need to aligh * length here * */ rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off; } nlattr_set_len(nw, off); } #endif static void dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm) { #ifdef ROUTE_MPATH if (NH_IS_NHGRP(rnd->rnd_nhop)) { dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm); return; } #endif const struct nhop_object *nh = rnd->rnd_nhop; uint32_t rtflags = nhop_get_rtflags(nh); /* * IPv4 over IPv6 * ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2), * IPv4 w/ gw * ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)], * Direct route: * ('RTA_OIF', 2) */ if (nh->nh_flags & NHF_GATEWAY) dump_rc_nhop_gw(nw, nh); uint32_t uidx = nhop_get_uidx(nh); if (uidx != 0) nlattr_add_u32(nw, NL_RTA_NH_ID, uidx); nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh)); nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags); if (rtflags & RTF_FIXEDMTU) dump_rc_nhop_mtu(nw, nh); uint32_t nh_expire = nhop_get_expire(nh); if (nh_expire > 0) nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime); /* In any case, fill outgoing interface */ nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp)); if (rnd->rnd_weight != RT_DEFAULT_WEIGHT) nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight); } /* * Dumps output from a rib command into an rtmsg */ static int dump_px(uint32_t fibnum, const struct nlmsghdr *hdr, const struct rtentry *rt, struct route_nhop_data *rnd, struct nl_writer *nw) { struct rtmsg *rtm; int error = 0; NET_EPOCH_ASSERT(); if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg))) goto enomem; int family = rt_get_family(rt); int rtm_off = nlattr_save_offset(nw); rtm = nlmsg_reserve_object(nw, struct rtmsg); rtm->rtm_family = family; rtm->rtm_dst_len = 0; rtm->rtm_src_len = 0; rtm->rtm_tos = 0; if (fibnum < 255) rtm->rtm_table = (unsigned char)fibnum; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop); rtm->rtm_type = get_rtm_type(rnd->rnd_nhop); nlattr_add_u32(nw, NL_RTA_TABLE, fibnum); int plen = 0; #if defined(INET) || defined(INET6) uint32_t scopeid; #endif switch (family) { #ifdef INET case AF_INET: { struct in_addr addr; rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); nlattr_add(nw, NL_RTA_DST, 4, &addr); break; } #endif #ifdef INET6 case AF_INET6: { struct in6_addr addr; rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); nlattr_add(nw, NL_RTA_DST, 16, &addr); break; } #endif default: FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family); error = EAFNOSUPPORT; goto flush; } rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg); if (plen > 0) rtm->rtm_dst_len = plen; dump_rc_nhop(nw, rnd, rtm); if (nlmsg_end(nw)) return (0); enomem: error = ENOMEM; flush: nlmsg_abort(nw); return (error); } static int family_to_group(int family) { switch (family) { case AF_INET: return (RTNLGRP_IPV4_ROUTE); case AF_INET6: return (RTNLGRP_IPV6_ROUTE); } return (0); } static void report_operation(uint32_t fibnum, struct rib_cmd_info *rc, struct nlpcb *nlp, struct nlmsghdr *hdr) { struct nl_writer nw; uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt)); if (nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, false)) { struct route_nhop_data rnd = { .rnd_nhop = rc_get_nhop(rc), .rnd_weight = rc->rc_nh_weight, }; hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE); hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND); switch (rc->rc_cmd) { case RTM_ADD: hdr->nlmsg_type = NL_RTM_NEWROUTE; hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL; break; case RTM_CHANGE: hdr->nlmsg_type = NL_RTM_NEWROUTE; hdr->nlmsg_flags |= NLM_F_REPLACE; break; case RTM_DELETE: hdr->nlmsg_type = NL_RTM_DELROUTE; break; } dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw); nlmsg_flush(&nw); } rtsock_callback_p->route_f(fibnum, rc); } static void set_scope6(struct sockaddr *sa, struct ifnet *ifp) { #ifdef INET6 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); } #endif } struct rta_mpath_nh { struct sockaddr *gw; struct ifnet *ifp; uint8_t rtnh_flags; uint8_t rtnh_weight; }; #define _IN(_field) offsetof(struct rtnexthop, _field) #define _OUT(_field) offsetof(struct rta_mpath_nh, _field) const static struct nlattr_parser nla_p_rtnh[] = { { .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip }, { .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia }, }; const static struct nlfield_parser nlf_p_rtnh[] = { { .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 }, { .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 }, { .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz }, }; #undef _IN #undef _OUT static bool post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused) { struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs; set_scope6(attrs->gw, attrs->ifp); return (true); } NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh); struct rta_mpath { int num_nhops; struct rta_mpath_nh nhops[0]; }; static int nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { int data_len = nla->nla_len - sizeof(struct nlattr); struct rtnexthop *rtnh; int max_nhops = data_len / sizeof(struct rtnexthop); struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh)); mp->num_nhops = 0; for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) { struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++]; int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser, npt, mpnh); if (error != 0) { NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed", mp->num_nhops - 1); return (error); } int len = NL_ITEM_ALIGN(rtnh->rtnh_len); data_len -= len; rtnh = (struct rtnexthop *)((char *)rtnh + len); } if (data_len != 0 || mp->num_nhops == 0) { NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr"); return (EINVAL); } *((struct rta_mpath **)target) = mp; return (0); } struct nl_parsed_route { struct sockaddr *rta_dst; struct sockaddr *rta_gw; struct ifnet *rta_oif; struct rta_mpath *rta_multipath; uint32_t rta_table; uint32_t rta_rtflags; uint32_t rta_nh_id; uint32_t rta_weight; uint32_t rtax_mtu; uint8_t rtm_table; uint8_t rtm_family; uint8_t rtm_dst_len; uint8_t rtm_protocol; uint8_t rtm_type; uint32_t rtm_flags; }; #define _IN(_field) offsetof(struct rtmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_route, _field) static struct nlattr_parser nla_p_rtmetrics[] = { { .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 }, }; NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics); static const struct nlattr_parser nla_p_rtmsg[] = { { .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip }, { .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp }, { .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip }, { .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested }, { .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath }, { .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 }, { .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 }, { .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 }, { .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia }, { .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_rtmsg[] = { { .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 }, { .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 }, { .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 }, { .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 }, { .off_in = _IN(rtm_table), .off_out = _OUT(rtm_table), .cb = nlf_get_u8 }, { .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 }, }; #undef _IN #undef _OUT static bool post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused) { struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs; set_scope6(attrs->rta_dst, attrs->rta_oif); set_scope6(attrs->rta_gw, attrs->rta_oif); return (true); } NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg); struct netlink_walkargs { struct nl_writer *nw; struct route_nhop_data rnd; struct nlmsghdr hdr; struct nlpcb *nlp; uint32_t fibnum; int family; int error; int count; int dumped; int dumped_tables; }; static int dump_rtentry(struct rtentry *rt, void *_arg) { struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; int error; wa->count++; if (wa->error != 0) return (0); if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp))) return (0); wa->dumped++; rt_get_rnd(rt, &wa->rnd); error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw); IF_DEBUG_LEVEL(LOG_DEBUG3) { char rtbuf[INET6_ADDRSTRLEN + 5]; FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family, "Dump %s, error %d", rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error); } wa->error = error; return (0); } static void dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family) { FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump"); wa->count = 0; wa->dumped = 0; rib_walk(fibnum, family, false, dump_rtentry, wa); wa->dumped_tables++; FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d", wa->count, wa->dumped); } static int dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family) { wa->fibnum = fibnum; if (family == AF_UNSPEC) { for (int i = 0; i < AF_MAX; i++) { if (rt_tables_get_rnh(fibnum, i) != 0) { wa->family = i; dump_rtable_one(wa, fibnum, i); if (wa->error != 0) break; } } } else { if (rt_tables_get_rnh(fibnum, family) != 0) { wa->family = family; dump_rtable_one(wa, fibnum, family); } } return (wa->error); } static int handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs, struct nlmsghdr *hdr, struct nl_pstate *npt) { RIB_RLOCK_TRACKER; struct rib_head *rnh; const struct rtentry *rt; struct route_nhop_data rnd; uint32_t fibnum = attrs->rta_table; sa_family_t family = attrs->rtm_family; if (attrs->rta_dst == NULL) { NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied"); return (EINVAL); } rnh = rt_tables_get_rnh(fibnum, family); if (rnh == NULL) return (EAFNOSUPPORT); RIB_RLOCK(rnh); struct sockaddr *dst = attrs->rta_dst; if (attrs->rtm_flags & RTM_F_PREFIX) rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd); else rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head); if (rt == NULL) { RIB_RUNLOCK(rnh); return (ESRCH); } rt_get_rnd(rt, &rnd); rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0); RIB_RUNLOCK(rnh); if (!rt_is_exportable(rt, nlp_get_cred(nlp))) return (ESRCH); IF_DEBUG_LEVEL(LOG_DEBUG2) { char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused; FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s", nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)), rt_print_buf(rt, rtbuf, sizeof(rtbuf))); } hdr->nlmsg_type = NL_RTM_NEWROUTE; dump_px(fibnum, hdr, rt, &rnd, npt->nw); return (0); } static int handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family, struct nlmsghdr *hdr, struct nl_writer *nw) { struct netlink_walkargs wa = { .nlp = nlp, .nw = nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_type = NL_RTM_NEWROUTE, .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, }; if (fibnum == RT_TABLE_UNSPEC) { for (int i = 0; i < V_rt_numfibs; i++) { dump_rtable_fib(&wa, fibnum, family); if (wa.error != 0) break; } } else dump_rtable_fib(&wa, fibnum, family); if (wa.error == 0 && wa.dumped_tables == 0) { FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family"); wa.error = ESRCH; // How do we propagate it? } if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (wa.error); } static struct nhop_object * finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror) { /* * The following MUST be filled: * nh_ifp, nh_ifa, nh_gw */ if (nh->gw_sa.sa_family == 0) { /* * Empty gateway. Can be direct route with RTA_OIF set. */ if (nh->nh_ifp != NULL) nhop_set_direct_gw(nh, nh->nh_ifp); else { NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping"); *perror = EINVAL; return (NULL); } /* Both nh_ifp and gateway are set */ } else { /* Gateway is set up, we can derive ifp if not set */ if (nh->nh_ifp == NULL) { uint32_t fibnum = nhop_get_fibnum(nh); uint32_t flags = 0; if (nh->nh_flags & NHF_GATEWAY) flags = RTF_GATEWAY; else if (nh->nh_flags & NHF_HOST) flags = RTF_HOST; struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum); if (ifa == NULL) { NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping"); *perror = EINVAL; return (NULL); } nhop_set_transmit_ifp(nh, ifa->ifa_ifp); } } /* Both nh_ifp and gateway are set */ if (nh->nh_ifa == NULL) { const struct sockaddr *gw_sa = &nh->gw_sa; if (gw_sa->sa_family != dst->sa_family) { /* * Use dst as the target for determining the default * preferred ifa IF * 1) the gateway is link-level (e.g. direct route) * 2) the gateway family is different (e.g. IPv4 over IPv6). */ gw_sa = dst; } struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); if (ifa == NULL) { /* Try link-level ifa. */ gw_sa = &nh->gw_sa; ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp); if (ifa == NULL) { NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping"); *perror = EINVAL; return (NULL); } } nhop_set_src(nh, ifa); } return (nhop_get_nhop(nh, perror)); } static int get_pxflag(const struct nl_parsed_route *attrs) { int pxflag = 0; switch (attrs->rtm_family) { case AF_INET: if (attrs->rtm_dst_len == 32) pxflag = NHF_HOST; else if (attrs->rtm_dst_len == 0) pxflag = NHF_DEFAULT; break; case AF_INET6: if (attrs->rtm_dst_len == 128) pxflag = NHF_HOST; else if (attrs->rtm_dst_len == 0) pxflag = NHF_DEFAULT; break; } return (pxflag); } static int get_op_flags(int nlm_flags) { int op_flags = 0; op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0; op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0; op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0; op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0; return (op_flags); } #ifdef ROUTE_MPATH static int create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh, struct nl_pstate *npt, struct nhop_object **pnh) { int error; if (mpnh->gw == NULL) return (EINVAL); struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); if (nh == NULL) return (ENOMEM); error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt); if (error != 0) { nhop_free(nh); return (error); } if (mpnh->ifp != NULL) nhop_set_transmit_ifp(nh, mpnh->ifp); nhop_set_pxtype_flag(nh, get_pxflag(attrs)); nhop_set_rtflags(nh, attrs->rta_rtflags); if (attrs->rtm_protocol > RTPROT_STATIC) nhop_set_origin(nh, attrs->rtm_protocol); *pnh = finalize_nhop(nh, attrs->rta_dst, &error); return (error); } #endif static struct nhop_object * create_nexthop_from_attrs(struct nl_parsed_route *attrs, struct nl_pstate *npt, int *perror) { struct nhop_object *nh = NULL; int error = 0; if (attrs->rta_multipath != NULL) { #ifdef ROUTE_MPATH /* Multipath w/o explicit nexthops */ int num_nhops = attrs->rta_multipath->num_nhops; struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops); for (int i = 0; i < num_nhops; i++) { struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i]; error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh); if (error != 0) { for (int j = 0; j < i; j++) nhop_free(wn[j].nh); break; } wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1; } if (error == 0) { struct rib_head *rh = nhop_get_rh(wn[0].nh); struct nhgrp_object *nhg; nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family, wn, num_nhops, perror); if (nhg != NULL) { if (attrs->rtm_protocol > RTPROT_STATIC) nhgrp_set_origin(nhg, attrs->rtm_protocol); nhg = nhgrp_get_nhgrp(nhg, perror); } for (int i = 0; i < num_nhops; i++) nhop_free(wn[i].nh); if (nhg != NULL) return ((struct nhop_object *)nhg); error = *perror; } #else error = ENOTSUP; #endif *perror = error; } else { nh = nhop_alloc(attrs->rta_table, attrs->rtm_family); if (nh == NULL) { *perror = ENOMEM; return (NULL); } if (attrs->rta_gw != NULL) { *perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt); if (*perror != 0) { nhop_free(nh); return (NULL); } } if (attrs->rta_oif != NULL) nhop_set_transmit_ifp(nh, attrs->rta_oif); if (attrs->rtax_mtu != 0) nhop_set_mtu(nh, attrs->rtax_mtu, true); if (attrs->rta_rtflags & RTF_BROADCAST) nhop_set_broadcast(nh, true); if (attrs->rtm_protocol > RTPROT_STATIC) nhop_set_origin(nh, attrs->rtm_protocol); nhop_set_pxtype_flag(nh, get_pxflag(attrs)); nhop_set_rtflags(nh, attrs->rta_rtflags); switch (attrs->rtm_type) { case RTN_UNICAST: break; case RTN_BLACKHOLE: nhop_set_blackhole(nh, RTF_BLACKHOLE); break; case RTN_PROHIBIT: case RTN_UNREACHABLE: nhop_set_blackhole(nh, RTF_REJECT); break; /* TODO: return ENOTSUP for other types if strict option is set */ } nh = finalize_nhop(nh, attrs->rta_dst, perror); } return (nh); } static int rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct rib_cmd_info rc = {}; struct nhop_object *nh = NULL; int error; struct nl_parsed_route attrs = {}; error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); if (error != 0) return (error); /* Check if we have enough data */ if (attrs.rta_dst == NULL) { NL_LOG(LOG_DEBUG, "missing RTA_DST"); return (EINVAL); } if (attrs.rtm_table > 0 && attrs.rta_table == 0) { /* pre-2.6.19 Linux API compatibility */ attrs.rta_table = attrs.rtm_table; } else if (attrs.rta_table >= V_rt_numfibs) { NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); return (EINVAL); } if (attrs.rta_nh_id != 0) { /* Referenced uindex */ int pxflag = get_pxflag(&attrs); nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id, pxflag, &error); if (error != 0) return (error); } else { nh = create_nexthop_from_attrs(&attrs, npt, &error); if (error != 0) { NL_LOG(LOG_DEBUG, "Error creating nexthop"); return (error); } } if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0) attrs.rta_weight = RT_DEFAULT_WEIGHT; struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight }; int op_flags = get_op_flags(hdr->nlmsg_flags); error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, &rnd, op_flags, &rc); if (error == 0) report_operation(attrs.rta_table, &rc, nlp, hdr); return (error); } static int path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data) { struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data; if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw)) return (0); if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp)) return (0); return (1); } static int rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct rib_cmd_info rc; int error; struct nl_parsed_route attrs = {}; error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); if (error != 0) return (error); if (attrs.rta_dst == NULL) { NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set"); return (ESRCH); } if (attrs.rta_table >= V_rt_numfibs) { NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); return (EINVAL); } error = rib_del_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc); if (error == 0) report_operation(attrs.rta_table, &rc, nlp, hdr); return (error); } static int rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { int error; struct nl_parsed_route attrs = {}; error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs); if (error != 0) return (error); if (attrs.rta_table >= V_rt_numfibs) { NLMSG_REPORT_ERR_MSG(npt, "invalid fib"); return (EINVAL); } if (hdr->nlmsg_flags & NLM_F_DUMP) error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw); else error = handle_rtm_getroute(nlp, &attrs, hdr, npt); return (error); } void rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) { struct nl_writer nw; int family, nlm_flags = 0; family = rt_get_family(rc->rc_rt); /* XXX: check if there are active listeners first */ /* TODO: consider passing PID/type/seq */ switch (rc->rc_cmd) { case RTM_ADD: nlm_flags = NLM_F_EXCL | NLM_F_CREATE; break; case RTM_CHANGE: nlm_flags = NLM_F_REPLACE; break; case RTM_DELETE: nlm_flags = 0; break; } IF_DEBUG_LEVEL(LOG_DEBUG2) { char rtbuf[NHOP_PRINT_BUFSIZE] __unused; FIB_LOG(LOG_DEBUG2, fibnum, family, "received event %s for %s / nlm_flags=%X", rib_print_cmd(rc->rc_cmd), rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)), nlm_flags); } struct nlmsghdr hdr = { .nlmsg_flags = nlm_flags, .nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd), }; struct route_nhop_data rnd = { .rnd_nhop = rc_get_nhop(rc), .rnd_weight = rc->rc_nh_weight, }; uint32_t group_id = family_to_group(family); if (!nl_writer_group(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id, false)) { NL_LOG(LOG_DEBUG, "error allocating event buffer"); return; } dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw); nlmsg_flush(&nw); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_GETROUTE, .name = "RTM_GETROUTE", .cb = &rtnl_handle_getroute, .flags = RTNL_F_ALLOW_NONVNET_JAIL, }, { .cmd = NL_RTM_DELROUTE, .name = "RTM_DELROUTE", .cb = &rtnl_handle_delroute, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_NEWROUTE, .name = "RTM_NEWROUTE", .cb = &rtnl_handle_newroute, .priv = PRIV_NET_ROUTE, } }; static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser}; void rtnl_routes_init(void) { NL_VERIFY_PARSERS(all_parsers); - rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); + rtnl_register_messages(cmd_handlers, nitems(cmd_handlers)); } diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 68b3659b0ed2..ad7dc97cbc1a 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -1,2014 +1,2014 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023 Alexander V. Chernikov * Copyright (c) 2023 Rubicon Communications, LLC (Netgate) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_pf #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); struct nl_parsed_state { uint8_t version; uint32_t id; uint32_t creatorid; char ifname[IFNAMSIZ]; uint16_t proto; sa_family_t af; struct pf_addr addr; struct pf_addr mask; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_parsed_state, _field) static const struct nlattr_parser nla_p_state[] = { { .type = PF_ST_ID, .off = _OUT(id), .cb = nlattr_get_uint32 }, { .type = PF_ST_CREATORID, .off = _OUT(creatorid), .cb = nlattr_get_uint32 }, { .type = PF_ST_IFNAME, .arg = (const void *)IFNAMSIZ, .off = _OUT(ifname), .cb = nlattr_get_chara }, { .type = PF_ST_AF, .off = _OUT(af), .cb = nlattr_get_uint8 }, { .type = PF_ST_PROTO, .off = _OUT(proto), .cb = nlattr_get_uint16 }, { .type = PF_ST_FILTER_ADDR, .off = _OUT(addr), .cb = nlattr_get_in6_addr }, { .type = PF_ST_FILTER_MASK, .off = _OUT(mask), .cb = nlattr_get_in6_addr }, }; static const struct nlfield_parser nlf_p_generic[] = { { .off_in = _IN(version), .off_out = _OUT(version), .cb = nlf_get_u8 }, }; #undef _IN #undef _OUT NL_DECLARE_PARSER(state_parser, struct genlmsghdr, nlf_p_generic, nla_p_state); static void dump_addr(struct nl_writer *nw, int attr, const struct pf_addr *addr, int af) { switch (af) { case AF_INET: nlattr_add(nw, attr, 4, &addr->v4); break; case AF_INET6: nlattr_add(nw, attr, 16, &addr->v6); break; }; } static bool dump_state_peer(struct nl_writer *nw, int attr, const struct pf_state_peer *peer) { int off = nlattr_add_nested(nw, attr); if (off == 0) return (false); nlattr_add_u32(nw, PF_STP_SEQLO, peer->seqlo); nlattr_add_u32(nw, PF_STP_SEQHI, peer->seqhi); nlattr_add_u32(nw, PF_STP_SEQDIFF, peer->seqdiff); nlattr_add_u16(nw, PF_STP_MAX_WIN, peer->max_win); nlattr_add_u16(nw, PF_STP_MSS, peer->mss); nlattr_add_u8(nw, PF_STP_STATE, peer->state); nlattr_add_u8(nw, PF_STP_WSCALE, peer->wscale); if (peer->scrub != NULL) { struct pf_state_scrub *sc = peer->scrub; uint16_t pfss_flags = sc->pfss_flags & PFSS_TIMESTAMP; nlattr_add_u16(nw, PF_STP_PFSS_FLAGS, pfss_flags); nlattr_add_u32(nw, PF_STP_PFSS_TS_MOD, sc->pfss_ts_mod); nlattr_add_u8(nw, PF_STP_PFSS_TTL, sc->pfss_ttl); nlattr_add_u8(nw, PF_STP_SCRUB_FLAG, PFSYNC_SCRUB_FLAG_VALID); } nlattr_set_len(nw, off); return (true); } static bool dump_state_key(struct nl_writer *nw, int attr, const struct pf_state_key *key) { int off = nlattr_add_nested(nw, attr); if (off == 0) return (false); dump_addr(nw, PF_STK_ADDR0, &key->addr[0], key->af); dump_addr(nw, PF_STK_ADDR1, &key->addr[1], key->af); nlattr_add_u16(nw, PF_STK_PORT0, key->port[0]); nlattr_add_u16(nw, PF_STK_PORT1, key->port[1]); nlattr_set_len(nw, off); return (true); } static int dump_state(struct nlpcb *nlp, const struct nlmsghdr *hdr, struct pf_kstate *s, struct nl_pstate *npt) { struct nl_writer *nw = npt->nw; int error = 0; int af; struct pf_state_key *key; PF_STATE_LOCK_ASSERT(s); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) goto enomem; struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GETSTATES; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u64(nw, PF_ST_VERSION, PF_STATE_VERSION); key = s->key[PF_SK_WIRE]; if (!dump_state_key(nw, PF_ST_KEY_WIRE, key)) goto enomem; key = s->key[PF_SK_STACK]; if (!dump_state_key(nw, PF_ST_KEY_STACK, key)) goto enomem; af = s->key[PF_SK_WIRE]->af; nlattr_add_u8(nw, PF_ST_PROTO, s->key[PF_SK_WIRE]->proto); nlattr_add_u8(nw, PF_ST_AF, af); nlattr_add_string(nw, PF_ST_IFNAME, s->kif->pfik_name); nlattr_add_string(nw, PF_ST_ORIG_IFNAME, s->orig_kif->pfik_name); dump_addr(nw, PF_ST_RT_ADDR, &s->act.rt_addr, af); nlattr_add_u32(nw, PF_ST_CREATION, time_uptime - (s->creation / 1000)); uint32_t expire = pf_state_expires(s); if (expire > time_uptime) expire = expire - time_uptime; nlattr_add_u32(nw, PF_ST_EXPIRE, expire); nlattr_add_u8(nw, PF_ST_DIRECTION, s->direction); nlattr_add_u8(nw, PF_ST_LOG, s->act.log); nlattr_add_u8(nw, PF_ST_TIMEOUT, s->timeout); nlattr_add_u16(nw, PF_ST_STATE_FLAGS, s->state_flags); uint8_t sync_flags = 0; if (s->src_node) sync_flags |= PFSYNC_FLAG_SRCNODE; if (s->nat_src_node) sync_flags |= PFSYNC_FLAG_NATSRCNODE; nlattr_add_u8(nw, PF_ST_SYNC_FLAGS, sync_flags); nlattr_add_u64(nw, PF_ST_ID, s->id); nlattr_add_u32(nw, PF_ST_CREATORID, htonl(s->creatorid)); nlattr_add_u32(nw, PF_ST_RULE, s->rule ? s->rule->nr : -1); nlattr_add_u32(nw, PF_ST_ANCHOR, s->anchor ? s->anchor->nr : -1); nlattr_add_u32(nw, PF_ST_NAT_RULE, s->nat_rule ? s->nat_rule->nr : -1); nlattr_add_u64(nw, PF_ST_PACKETS0, s->packets[0]); nlattr_add_u64(nw, PF_ST_PACKETS1, s->packets[1]); nlattr_add_u64(nw, PF_ST_BYTES0, s->bytes[0]); nlattr_add_u64(nw, PF_ST_BYTES1, s->bytes[1]); nlattr_add_u32(nw, PF_ST_RTABLEID, s->act.rtableid); nlattr_add_u8(nw, PF_ST_MIN_TTL, s->act.min_ttl); nlattr_add_u16(nw, PF_ST_MAX_MSS, s->act.max_mss); nlattr_add_u16(nw, PF_ST_DNPIPE, s->act.dnpipe); nlattr_add_u16(nw, PF_ST_DNRPIPE, s->act.dnrpipe); nlattr_add_u8(nw, PF_ST_RT, s->act.rt); if (s->act.rt_kif != NULL) nlattr_add_string(nw, PF_ST_RT_IFNAME, s->act.rt_kif->pfik_name); if (!dump_state_peer(nw, PF_ST_PEER_SRC, &s->src)) goto enomem; if (!dump_state_peer(nw, PF_ST_PEER_DST, &s->dst)) goto enomem; if (nlmsg_end(nw)) return (0); enomem: error = ENOMEM; nlmsg_abort(nw); return (error); } static int handle_dumpstates(struct nlpcb *nlp, struct nl_parsed_state *attrs, struct nlmsghdr *hdr, struct nl_pstate *npt) { int error = 0; hdr->nlmsg_flags |= NLM_F_MULTI; for (int i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_kstate *s; if (LIST_EMPTY(&ih->states)) continue; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { sa_family_t af = s->key[PF_SK_WIRE]->af; if (s->timeout == PFTM_UNLINKED) continue; /* Filter */ if (attrs->creatorid != 0 && s->creatorid != attrs->creatorid) continue; if (attrs->ifname[0] != 0 && strncmp(attrs->ifname, s->kif->pfik_name, IFNAMSIZ) != 0) continue; if (attrs->proto != 0 && s->key[PF_SK_WIRE]->proto != attrs->proto) continue; if (attrs->af != 0 && af != attrs->af) continue; if (pf_match_addr(1, &s->key[PF_SK_WIRE]->addr[0], &attrs->mask, &attrs->addr, af) && pf_match_addr(1, &s->key[PF_SK_WIRE]->addr[1], &attrs->mask, &attrs->addr, af) && pf_match_addr(1, &s->key[PF_SK_STACK]->addr[0], &attrs->mask, &attrs->addr, af) && pf_match_addr(1, &s->key[PF_SK_STACK]->addr[1], &attrs->mask, &attrs->addr, af)) continue; error = dump_state(nlp, hdr, s, npt); if (error != 0) break; } PF_HASHROW_UNLOCK(ih); } if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static int handle_getstate(struct nlpcb *nlp, struct nl_parsed_state *attrs, struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_kstate *s; int ret; s = pf_find_state_byid(attrs->id, attrs->creatorid); if (s == NULL) return (ENOENT); ret = dump_state(nlp, hdr, s, npt); PF_STATE_UNLOCK(s); return (ret); } static int dump_creatorid(struct nlpcb *nlp, const struct nlmsghdr *hdr, uint32_t creator, struct nl_pstate *npt) { struct nl_writer *nw = npt->nw; if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) goto enomem; struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GETCREATORS; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_ST_CREATORID, htonl(creator)); if (nlmsg_end(nw)) return (0); enomem: nlmsg_abort(nw); return (ENOMEM); } static int pf_handle_getstates(struct nlmsghdr *hdr, struct nl_pstate *npt) { int error; struct nl_parsed_state attrs = {}; error = nl_parse_nlmsg(hdr, &state_parser, npt, &attrs); if (error != 0) return (error); if (attrs.id != 0) error = handle_getstate(npt->nlp, &attrs, hdr, npt); else error = handle_dumpstates(npt->nlp, &attrs, hdr, npt); return (error); } static int pf_handle_getcreators(struct nlmsghdr *hdr, struct nl_pstate *npt) { uint32_t creators[16]; int error = 0; bzero(creators, sizeof(creators)); for (int i = 0; i < V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_kstate *s; if (LIST_EMPTY(&ih->states)) continue; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { int j; if (s->timeout == PFTM_UNLINKED) continue; for (j = 0; j < nitems(creators); j++) { if (creators[j] == s->creatorid) break; if (creators[j] == 0) { creators[j] = s->creatorid; break; } } if (j == nitems(creators)) printf("Warning: too many creators!\n"); } PF_HASHROW_UNLOCK(ih); } hdr->nlmsg_flags |= NLM_F_MULTI; for (int i = 0; i < nitems(creators); i++) { if (creators[i] == 0) break; error = dump_creatorid(npt->nlp, hdr, creators[i], npt); } if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static int pf_handle_start(struct nlmsghdr *hdr __unused, struct nl_pstate *npt __unused) { return (pf_start()); } static int pf_handle_stop(struct nlmsghdr *hdr __unused, struct nl_pstate *npt __unused) { return (pf_stop()); } #define _OUT(_field) offsetof(struct pf_addr_wrap, _field) static const struct nlattr_parser nla_p_addr_wrap[] = { { .type = PF_AT_ADDR, .off = _OUT(v.a.addr), .cb = nlattr_get_in6_addr }, { .type = PF_AT_MASK, .off = _OUT(v.a.mask), .cb = nlattr_get_in6_addr }, { .type = PF_AT_IFNAME, .off = _OUT(v.ifname), .arg = (void *)IFNAMSIZ,.cb = nlattr_get_chara }, { .type = PF_AT_TABLENAME, .off = _OUT(v.tblname), .arg = (void *)PF_TABLE_NAME_SIZE, .cb = nlattr_get_chara }, { .type = PF_AT_TYPE, .off = _OUT(type), .cb = nlattr_get_uint8 }, { .type = PF_AT_IFLAGS, .off = _OUT(iflags), .cb = nlattr_get_uint8 }, }; NL_DECLARE_ATTR_PARSER(addr_wrap_parser, nla_p_addr_wrap); #undef _OUT static bool nlattr_add_addr_wrap(struct nl_writer *nw, int attrtype, struct pf_addr_wrap *a) { int off = nlattr_add_nested(nw, attrtype); nlattr_add_in6_addr(nw, PF_AT_ADDR, &a->v.a.addr.v6); nlattr_add_in6_addr(nw, PF_AT_MASK, &a->v.a.mask.v6); nlattr_add_u8(nw, PF_AT_TYPE, a->type); nlattr_add_u8(nw, PF_AT_IFLAGS, a->iflags); if (a->type == PF_ADDR_DYNIFTL) { nlattr_add_string(nw, PF_AT_IFNAME, a->v.ifname); nlattr_add_u32(nw, PF_AT_DYNCNT, a->p.dyncnt); } else if (a->type == PF_ADDR_TABLE) { nlattr_add_string(nw, PF_AT_TABLENAME, a->v.tblname); nlattr_add_u32(nw, PF_AT_TBLCNT, a->p.tblcnt); } nlattr_set_len(nw, off); return (true); } #define _OUT(_field) offsetof(struct pf_rule_addr, _field) static const struct nlattr_parser nla_p_ruleaddr[] = { { .type = PF_RAT_ADDR, .off = _OUT(addr), .arg = &addr_wrap_parser, .cb = nlattr_get_nested }, { .type = PF_RAT_SRC_PORT, .off = _OUT(port[0]), .cb = nlattr_get_uint16 }, { .type = PF_RAT_DST_PORT, .off = _OUT(port[1]), .cb = nlattr_get_uint16 }, { .type = PF_RAT_NEG, .off = _OUT(neg), .cb = nlattr_get_uint8 }, { .type = PF_RAT_OP, .off = _OUT(port_op), .cb = nlattr_get_uint8 }, }; NL_DECLARE_ATTR_PARSER(rule_addr_parser, nla_p_ruleaddr); #undef _OUT static bool nlattr_add_rule_addr(struct nl_writer *nw, int attrtype, struct pf_rule_addr *r) { struct pf_addr_wrap aw = {0}; int off = nlattr_add_nested(nw, attrtype); bcopy(&(r->addr), &aw, sizeof(struct pf_addr_wrap)); pf_addr_copyout(&aw); nlattr_add_addr_wrap(nw, PF_RAT_ADDR, &aw); nlattr_add_u16(nw, PF_RAT_SRC_PORT, r->port[0]); nlattr_add_u16(nw, PF_RAT_DST_PORT, r->port[1]); nlattr_add_u8(nw, PF_RAT_NEG, r->neg); nlattr_add_u8(nw, PF_RAT_OP, r->port_op); nlattr_set_len(nw, off); return (true); } #define _OUT(_field) offsetof(struct pf_mape_portset, _field) static const struct nlattr_parser nla_p_mape_portset[] = { { .type = PF_MET_OFFSET, .off = _OUT(offset), .cb = nlattr_get_uint8 }, { .type = PF_MET_PSID_LEN, .off = _OUT(psidlen), .cb = nlattr_get_uint8 }, {. type = PF_MET_PSID, .off = _OUT(psid), .cb = nlattr_get_uint16 }, }; NL_DECLARE_ATTR_PARSER(mape_portset_parser, nla_p_mape_portset); #undef _OUT static bool nlattr_add_mape_portset(struct nl_writer *nw, int attrtype, const struct pf_mape_portset *m) { int off = nlattr_add_nested(nw, attrtype); nlattr_add_u8(nw, PF_MET_OFFSET, m->offset); nlattr_add_u8(nw, PF_MET_PSID_LEN, m->psidlen); nlattr_add_u16(nw, PF_MET_PSID, m->psid); nlattr_set_len(nw, off); return (true); } struct nl_parsed_labels { char labels[PF_RULE_MAX_LABEL_COUNT][PF_RULE_LABEL_SIZE]; uint32_t i; }; static int nlattr_get_pf_rule_labels(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { struct nl_parsed_labels *l = (struct nl_parsed_labels *)target; int ret; if (l->i >= PF_RULE_MAX_LABEL_COUNT) return (E2BIG); ret = nlattr_get_chara(nla, npt, (void *)PF_RULE_LABEL_SIZE, l->labels[l->i]); if (ret == 0) l->i++; return (ret); } #define _OUT(_field) offsetof(struct nl_parsed_labels, _field) static const struct nlattr_parser nla_p_labels[] = { { .type = PF_LT_LABEL, .off = 0, .cb = nlattr_get_pf_rule_labels }, }; NL_DECLARE_ATTR_PARSER(rule_labels_parser, nla_p_labels); #undef _OUT static int nlattr_get_nested_pf_rule_labels(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { struct nl_parsed_labels parsed_labels = { }; int error; /* Assumes target points to the beginning of the structure */ error = nl_parse_header(NLA_DATA(nla), NLA_DATA_LEN(nla), &rule_labels_parser, npt, &parsed_labels); if (error != 0) return (error); memcpy(target, parsed_labels.labels, sizeof(parsed_labels.labels)); return (0); } static bool nlattr_add_labels(struct nl_writer *nw, int attrtype, const struct pf_krule *r) { int off = nlattr_add_nested(nw, attrtype); int i = 0; while (r->label[i][0] != 0 && i < PF_RULE_MAX_LABEL_COUNT) { nlattr_add_string(nw, PF_LT_LABEL, r->label[i]); i++; } nlattr_set_len(nw, off); return (true); } #define _OUT(_field) offsetof(struct pf_kpool, _field) static const struct nlattr_parser nla_p_pool[] = { { .type = PF_PT_KEY, .off = _OUT(key), .arg = (void *)sizeof(struct pf_poolhashkey), .cb = nlattr_get_bytes }, { .type = PF_PT_COUNTER, .off = _OUT(counter), .cb = nlattr_get_in6_addr }, { .type = PF_PT_TBLIDX, .off = _OUT(tblidx), .cb = nlattr_get_uint32 }, { .type = PF_PT_PROXY_SRC_PORT, .off = _OUT(proxy_port[0]), .cb = nlattr_get_uint16 }, { .type = PF_PT_PROXY_DST_PORT, .off = _OUT(proxy_port[1]), .cb = nlattr_get_uint16 }, { .type = PF_PT_OPTS, .off = _OUT(opts), .cb = nlattr_get_uint8 }, { .type = PF_PT_MAPE, .off = _OUT(mape), .arg = &mape_portset_parser, .cb = nlattr_get_nested }, }; NL_DECLARE_ATTR_PARSER(pool_parser, nla_p_pool); #undef _OUT static bool nlattr_add_pool(struct nl_writer *nw, int attrtype, const struct pf_kpool *pool) { int off = nlattr_add_nested(nw, attrtype); nlattr_add(nw, PF_PT_KEY, sizeof(struct pf_poolhashkey), &pool->key); nlattr_add_in6_addr(nw, PF_PT_COUNTER, (const struct in6_addr *)&pool->counter); nlattr_add_u32(nw, PF_PT_TBLIDX, pool->tblidx); nlattr_add_u16(nw, PF_PT_PROXY_SRC_PORT, pool->proxy_port[0]); nlattr_add_u16(nw, PF_PT_PROXY_DST_PORT, pool->proxy_port[1]); nlattr_add_u8(nw, PF_PT_OPTS, pool->opts); nlattr_add_mape_portset(nw, PF_PT_MAPE, &pool->mape); nlattr_set_len(nw, off); return (true); } #define _OUT(_field) offsetof(struct pf_rule_uid, _field) static const struct nlattr_parser nla_p_rule_uid[] = { { .type = PF_RUT_UID_LOW, .off = _OUT(uid[0]), .cb = nlattr_get_uint32 }, { .type = PF_RUT_UID_HIGH, .off = _OUT(uid[1]), .cb = nlattr_get_uint32 }, { .type = PF_RUT_OP, .off = _OUT(op), .cb = nlattr_get_uint8 }, }; NL_DECLARE_ATTR_PARSER(rule_uid_parser, nla_p_rule_uid); #undef _OUT static bool nlattr_add_rule_uid(struct nl_writer *nw, int attrtype, const struct pf_rule_uid *u) { int off = nlattr_add_nested(nw, attrtype); nlattr_add_u32(nw, PF_RUT_UID_LOW, u->uid[0]); nlattr_add_u32(nw, PF_RUT_UID_HIGH, u->uid[1]); nlattr_add_u8(nw, PF_RUT_OP, u->op); nlattr_set_len(nw, off); return (true); } struct nl_parsed_timeouts { uint32_t timeouts[PFTM_MAX]; uint32_t i; }; static int nlattr_get_pf_timeout(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { struct nl_parsed_timeouts *t = (struct nl_parsed_timeouts *)target; int ret; if (t->i >= PFTM_MAX) return (E2BIG); ret = nlattr_get_uint32(nla, npt, NULL, &t->timeouts[t->i]); if (ret == 0) t->i++; return (ret); } #define _OUT(_field) offsetof(struct nl_parsed_timeout, _field) static const struct nlattr_parser nla_p_timeouts[] = { { .type = PF_TT_TIMEOUT, .off = 0, .cb = nlattr_get_pf_timeout }, }; NL_DECLARE_ATTR_PARSER(timeout_parser, nla_p_timeouts); #undef _OUT static int nlattr_get_nested_timeouts(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { struct nl_parsed_timeouts parsed_timeouts = { }; int error; /* Assumes target points to the beginning of the structure */ error = nl_parse_header(NLA_DATA(nla), NLA_DATA_LEN(nla), &timeout_parser, npt, &parsed_timeouts); if (error != 0) return (error); memcpy(target, parsed_timeouts.timeouts, sizeof(parsed_timeouts.timeouts)); return (0); } static bool nlattr_add_timeout(struct nl_writer *nw, int attrtype, uint32_t *timeout) { int off = nlattr_add_nested(nw, attrtype); for (int i = 0; i < PFTM_MAX; i++) nlattr_add_u32(nw, PF_RT_TIMEOUT, timeout[i]); nlattr_set_len(nw, off); return (true); } #define _OUT(_field) offsetof(struct pf_krule, _field) static const struct nlattr_parser nla_p_rule[] = { { .type = PF_RT_SRC, .off = _OUT(src), .arg = &rule_addr_parser,.cb = nlattr_get_nested }, { .type = PF_RT_DST, .off = _OUT(dst), .arg = &rule_addr_parser,.cb = nlattr_get_nested }, { .type = PF_RT_RIDENTIFIER, .off = _OUT(ridentifier), .cb = nlattr_get_uint32 }, { .type = PF_RT_LABELS, .off = _OUT(label), .arg = &rule_labels_parser,.cb = nlattr_get_nested_pf_rule_labels }, { .type = PF_RT_IFNAME, .off = _OUT(ifname), .arg = (void *)IFNAMSIZ, .cb = nlattr_get_chara }, { .type = PF_RT_QNAME, .off = _OUT(qname), .arg = (void *)PF_QNAME_SIZE, .cb = nlattr_get_chara }, { .type = PF_RT_PQNAME, .off = _OUT(pqname), .arg = (void *)PF_QNAME_SIZE, .cb = nlattr_get_chara }, { .type = PF_RT_TAGNAME, .off = _OUT(tagname), .arg = (void *)PF_TAG_NAME_SIZE, .cb = nlattr_get_chara }, { .type = PF_RT_MATCH_TAGNAME, .off = _OUT(match_tagname), .arg = (void *)PF_TAG_NAME_SIZE, .cb = nlattr_get_chara }, { .type = PF_RT_OVERLOAD_TBLNAME, .off = _OUT(overload_tblname), .arg = (void *)PF_TABLE_NAME_SIZE, .cb = nlattr_get_chara }, { .type = PF_RT_RPOOL, .off = _OUT(rpool), .arg = &pool_parser, .cb = nlattr_get_nested }, { .type = PF_RT_OS_FINGERPRINT, .off = _OUT(os_fingerprint), .cb = nlattr_get_uint32 }, { .type = PF_RT_RTABLEID, .off = _OUT(rtableid), .cb = nlattr_get_uint32 }, { .type = PF_RT_TIMEOUT, .off = _OUT(timeout), .arg = &timeout_parser, .cb = nlattr_get_nested_timeouts }, { .type = PF_RT_MAX_STATES, .off = _OUT(max_states), .cb = nlattr_get_uint32 }, { .type = PF_RT_MAX_SRC_NODES, .off = _OUT(max_src_nodes), .cb = nlattr_get_uint32 }, { .type = PF_RT_MAX_SRC_STATES, .off = _OUT(max_src_states), .cb = nlattr_get_uint32 }, { .type = PF_RT_MAX_SRC_CONN_RATE_LIMIT, .off = _OUT(max_src_conn_rate.limit), .cb = nlattr_get_uint32 }, { .type = PF_RT_MAX_SRC_CONN_RATE_SECS, .off = _OUT(max_src_conn_rate.seconds), .cb = nlattr_get_uint32 }, { .type = PF_RT_DNPIPE, .off = _OUT(dnpipe), .cb = nlattr_get_uint16 }, { .type = PF_RT_DNRPIPE, .off = _OUT(dnrpipe), .cb = nlattr_get_uint16 }, { .type = PF_RT_DNFLAGS, .off = _OUT(free_flags), .cb = nlattr_get_uint32 }, { .type = PF_RT_NR, .off = _OUT(nr), .cb = nlattr_get_uint32 }, { .type = PF_RT_PROB, .off = _OUT(prob), .cb = nlattr_get_uint32 }, { .type = PF_RT_CUID, .off = _OUT(cuid), .cb = nlattr_get_uint32 }, {. type = PF_RT_CPID, .off = _OUT(cpid), .cb = nlattr_get_uint32 }, { .type = PF_RT_RETURN_ICMP, .off = _OUT(return_icmp), .cb = nlattr_get_uint16 }, { .type = PF_RT_RETURN_ICMP6, .off = _OUT(return_icmp6), .cb = nlattr_get_uint16 }, { .type = PF_RT_MAX_MSS, .off = _OUT(max_mss), .cb = nlattr_get_uint16 }, { .type = PF_RT_SCRUB_FLAGS, .off = _OUT(scrub_flags), .cb = nlattr_get_uint16 }, { .type = PF_RT_UID, .off = _OUT(uid), .arg = &rule_uid_parser, .cb = nlattr_get_nested }, { .type = PF_RT_GID, .off = _OUT(gid), .arg = &rule_uid_parser, .cb = nlattr_get_nested }, { .type = PF_RT_RULE_FLAG, .off = _OUT(rule_flag), .cb = nlattr_get_uint32 }, { .type = PF_RT_ACTION, .off = _OUT(action), .cb = nlattr_get_uint8 }, { .type = PF_RT_DIRECTION, .off = _OUT(direction), .cb = nlattr_get_uint8 }, { .type = PF_RT_LOG, .off = _OUT(log), .cb = nlattr_get_uint8 }, { .type = PF_RT_LOGIF, .off = _OUT(logif), .cb = nlattr_get_uint8 }, { .type = PF_RT_QUICK, .off = _OUT(quick), .cb = nlattr_get_uint8 }, { .type = PF_RT_IF_NOT, .off = _OUT(ifnot), .cb = nlattr_get_uint8 }, { .type = PF_RT_MATCH_TAG_NOT, .off = _OUT(match_tag_not), .cb = nlattr_get_uint8 }, { .type = PF_RT_NATPASS, .off = _OUT(natpass), .cb = nlattr_get_uint8 }, { .type = PF_RT_KEEP_STATE, .off = _OUT(keep_state), .cb = nlattr_get_uint8 }, { .type = PF_RT_AF, .off = _OUT(af), .cb = nlattr_get_uint8 }, { .type = PF_RT_PROTO, .off = _OUT(proto), .cb = nlattr_get_uint8 }, { .type = PF_RT_TYPE, .off = _OUT(type), .cb = nlattr_get_uint8 }, { .type = PF_RT_CODE, .off = _OUT(code), .cb = nlattr_get_uint8 }, { .type = PF_RT_FLAGS, .off = _OUT(flags), .cb = nlattr_get_uint8 }, { .type = PF_RT_FLAGSET, .off = _OUT(flagset), .cb = nlattr_get_uint8 }, { .type = PF_RT_MIN_TTL, .off = _OUT(min_ttl), .cb = nlattr_get_uint8 }, { .type = PF_RT_ALLOW_OPTS, .off = _OUT(allow_opts), .cb = nlattr_get_uint8 }, { .type = PF_RT_RT, .off = _OUT(rt), .cb = nlattr_get_uint8 }, { .type = PF_RT_RETURN_TTL, .off = _OUT(return_ttl), .cb = nlattr_get_uint8 }, { .type = PF_RT_TOS, .off = _OUT(tos), .cb = nlattr_get_uint8 }, { .type = PF_RT_SET_TOS, .off = _OUT(set_tos), .cb = nlattr_get_uint8 }, { .type = PF_RT_ANCHOR_RELATIVE, .off = _OUT(anchor_relative), .cb = nlattr_get_uint8 }, { .type = PF_RT_ANCHOR_WILDCARD, .off = _OUT(anchor_wildcard), .cb = nlattr_get_uint8 }, { .type = PF_RT_FLUSH, .off = _OUT(flush), .cb = nlattr_get_uint8 }, { .type = PF_RT_PRIO, .off = _OUT(prio), .cb = nlattr_get_uint8 }, { .type = PF_RT_SET_PRIO, .off = _OUT(set_prio[0]), .cb = nlattr_get_uint8 }, { .type = PF_RT_SET_PRIO_REPLY, .off = _OUT(set_prio[1]), .cb = nlattr_get_uint8 }, { .type = PF_RT_DIVERT_ADDRESS, .off = _OUT(divert.addr), .cb = nlattr_get_in6_addr }, { .type = PF_RT_DIVERT_PORT, .off = _OUT(divert.port), .cb = nlattr_get_uint16 }, { .type = PF_RT_RCV_IFNAME, .off = _OUT(rcv_ifname), .arg = (void *)IFNAMSIZ, .cb = nlattr_get_chara }, { .type = PF_RT_MAX_SRC_CONN, .off = _OUT(max_src_conn), .cb = nlattr_get_uint32 }, }; NL_DECLARE_ATTR_PARSER(rule_parser, nla_p_rule); #undef _OUT struct nl_parsed_addrule { struct pf_krule *rule; uint32_t ticket; uint32_t pool_ticket; char *anchor; char *anchor_call; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_parsed_addrule, _field) static const struct nlattr_parser nla_p_addrule[] = { { .type = PF_ART_TICKET, .off = _OUT(ticket), .cb = nlattr_get_uint32 }, { .type = PF_ART_POOL_TICKET, .off = _OUT(pool_ticket), .cb = nlattr_get_uint32 }, { .type = PF_ART_ANCHOR, .off = _OUT(anchor), .cb = nlattr_get_string }, { .type = PF_ART_ANCHOR_CALL, .off = _OUT(anchor_call), .cb = nlattr_get_string }, { .type = PF_ART_RULE, .off = _OUT(rule), .arg = &rule_parser, .cb = nlattr_get_nested_ptr } }; static const struct nlfield_parser nlf_p_addrule[] = { }; #undef _IN #undef _OUT NL_DECLARE_PARSER(addrule_parser, struct genlmsghdr, nlf_p_addrule, nla_p_addrule); static int pf_handle_addrule(struct nlmsghdr *hdr, struct nl_pstate *npt) { int error; struct nl_parsed_addrule attrs = {}; attrs.rule = pf_krule_alloc(); error = nl_parse_nlmsg(hdr, &addrule_parser, npt, &attrs); if (error != 0) { pf_free_rule(attrs.rule); return (error); } error = pf_ioctl_addrule(attrs.rule, attrs.ticket, attrs.pool_ticket, attrs.anchor, attrs.anchor_call, nlp_get_cred(npt->nlp)->cr_uid, hdr->nlmsg_pid); return (error); } #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct pfioc_rule, _field) static const struct nlattr_parser nla_p_getrules[] = { { .type = PF_GR_ANCHOR, .off = _OUT(anchor), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, { .type = PF_GR_ACTION, .off = _OUT(rule.action), .cb = nlattr_get_uint8 }, }; static const struct nlfield_parser nlf_p_getrules[] = { }; #undef _IN #undef _OUT NL_DECLARE_PARSER(getrules_parser, struct genlmsghdr, nlf_p_getrules, nla_p_getrules); static int pf_handle_getrules(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pfioc_rule attrs = {}; int error; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; error = nl_parse_nlmsg(hdr, &getrules_parser, npt, &attrs); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GETRULES; ghdr_new->version = 0; ghdr_new->reserved = 0; error = pf_ioctl_getrules(&attrs); if (error != 0) goto out; nlattr_add_u32(nw, PF_GR_NR, attrs.nr); nlattr_add_u32(nw, PF_GR_TICKET, attrs.ticket); if (!nlmsg_end(nw)) { error = ENOMEM; goto out; } return (0); out: nlmsg_abort(nw); return (error); } struct nl_parsed_get_rule { char anchor[MAXPATHLEN]; uint8_t action; uint32_t nr; uint32_t ticket; uint8_t clear; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_parsed_get_rule, _field) static const struct nlattr_parser nla_p_getrule[] = { { .type = PF_GR_ANCHOR, .off = _OUT(anchor), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, { .type = PF_GR_ACTION, .off = _OUT(action), .cb = nlattr_get_uint8 }, { .type = PF_GR_NR, .off = _OUT(nr), .cb = nlattr_get_uint32 }, { .type = PF_GR_TICKET, .off = _OUT(ticket), .cb = nlattr_get_uint32 }, { .type = PF_GR_CLEAR, .off = _OUT(clear), .cb = nlattr_get_uint8 }, }; static const struct nlfield_parser nlf_p_getrule[] = { }; #undef _IN #undef _OUT NL_DECLARE_PARSER(getrule_parser, struct genlmsghdr, nlf_p_getrule, nla_p_getrule); static int pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) { char anchor_call[MAXPATHLEN]; struct nl_parsed_get_rule attrs = {}; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; struct pf_kruleset *ruleset; struct pf_krule *rule; int rs_num; int error; error = nl_parse_nlmsg(hdr, &getrule_parser, npt, &attrs); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GETRULE; ghdr_new->version = 0; ghdr_new->reserved = 0; PF_RULES_WLOCK(); ruleset = pf_find_kruleset(attrs.anchor); if (ruleset == NULL) { PF_RULES_WUNLOCK(); error = ENOENT; goto out; } rs_num = pf_get_ruleset_number(attrs.action); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); error = EINVAL; goto out; } if (attrs.ticket != ruleset->rules[rs_num].active.ticket) { PF_RULES_WUNLOCK(); error = EBUSY; goto out; } rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != attrs.nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_WUNLOCK(); error = EBUSY; goto out; } nlattr_add_rule_addr(nw, PF_RT_SRC, &rule->src); nlattr_add_rule_addr(nw, PF_RT_DST, &rule->dst); nlattr_add_u32(nw, PF_RT_RIDENTIFIER, rule->ridentifier); nlattr_add_labels(nw, PF_RT_LABELS, rule); nlattr_add_string(nw, PF_RT_IFNAME, rule->ifname); nlattr_add_string(nw, PF_RT_QNAME, rule->qname); nlattr_add_string(nw, PF_RT_PQNAME, rule->pqname); nlattr_add_string(nw, PF_RT_TAGNAME, rule->tagname); nlattr_add_string(nw, PF_RT_MATCH_TAGNAME, rule->match_tagname); nlattr_add_string(nw, PF_RT_OVERLOAD_TBLNAME, rule->overload_tblname); nlattr_add_pool(nw, PF_RT_RPOOL, &rule->rpool); nlattr_add_u32(nw, PF_RT_OS_FINGERPRINT, rule->os_fingerprint); nlattr_add_u32(nw, PF_RT_RTABLEID, rule->rtableid); nlattr_add_timeout(nw, PF_RT_TIMEOUT, rule->timeout); nlattr_add_u32(nw, PF_RT_MAX_STATES, rule->max_states); nlattr_add_u32(nw, PF_RT_MAX_SRC_NODES, rule->max_src_nodes); nlattr_add_u32(nw, PF_RT_MAX_SRC_STATES, rule->max_src_states); nlattr_add_u32(nw, PF_RT_MAX_SRC_CONN, rule->max_src_conn); nlattr_add_u32(nw, PF_RT_MAX_SRC_CONN_RATE_LIMIT, rule->max_src_conn_rate.limit); nlattr_add_u32(nw, PF_RT_MAX_SRC_CONN_RATE_SECS, rule->max_src_conn_rate.seconds); nlattr_add_u16(nw, PF_RT_DNPIPE, rule->dnpipe); nlattr_add_u16(nw, PF_RT_DNRPIPE, rule->dnrpipe); nlattr_add_u32(nw, PF_RT_DNFLAGS, rule->free_flags); nlattr_add_u32(nw, PF_RT_NR, rule->nr); nlattr_add_u32(nw, PF_RT_PROB, rule->prob); nlattr_add_u32(nw, PF_RT_CUID, rule->cuid); nlattr_add_u32(nw, PF_RT_CPID, rule->cpid); nlattr_add_u16(nw, PF_RT_RETURN_ICMP, rule->return_icmp); nlattr_add_u16(nw, PF_RT_RETURN_ICMP6, rule->return_icmp6); nlattr_add_u16(nw, PF_RT_RETURN_ICMP6, rule->return_icmp6); nlattr_add_u16(nw, PF_RT_MAX_MSS, rule->max_mss); nlattr_add_u16(nw, PF_RT_SCRUB_FLAGS, rule->scrub_flags); nlattr_add_rule_uid(nw, PF_RT_UID, &rule->uid); nlattr_add_rule_uid(nw, PF_RT_GID, (const struct pf_rule_uid *)&rule->gid); nlattr_add_string(nw, PF_RT_RCV_IFNAME, rule->rcv_ifname); nlattr_add_u32(nw, PF_RT_RULE_FLAG, rule->rule_flag); nlattr_add_u8(nw, PF_RT_ACTION, rule->action); nlattr_add_u8(nw, PF_RT_DIRECTION, rule->direction); nlattr_add_u8(nw, PF_RT_LOG, rule->log); nlattr_add_u8(nw, PF_RT_LOGIF, rule->logif); nlattr_add_u8(nw, PF_RT_QUICK, rule->quick); nlattr_add_u8(nw, PF_RT_IF_NOT, rule->ifnot); nlattr_add_u8(nw, PF_RT_MATCH_TAG_NOT, rule->match_tag_not); nlattr_add_u8(nw, PF_RT_NATPASS, rule->natpass); nlattr_add_u8(nw, PF_RT_KEEP_STATE, rule->keep_state); nlattr_add_u8(nw, PF_RT_AF, rule->af); nlattr_add_u8(nw, PF_RT_PROTO, rule->proto); nlattr_add_u8(nw, PF_RT_TYPE, rule->type); nlattr_add_u8(nw, PF_RT_CODE, rule->code); nlattr_add_u8(nw, PF_RT_FLAGS, rule->flags); nlattr_add_u8(nw, PF_RT_FLAGSET, rule->flagset); nlattr_add_u8(nw, PF_RT_MIN_TTL, rule->min_ttl); nlattr_add_u8(nw, PF_RT_ALLOW_OPTS, rule->allow_opts); nlattr_add_u8(nw, PF_RT_RT, rule->rt); nlattr_add_u8(nw, PF_RT_RETURN_TTL, rule->return_ttl); nlattr_add_u8(nw, PF_RT_TOS, rule->tos); nlattr_add_u8(nw, PF_RT_SET_TOS, rule->set_tos); nlattr_add_u8(nw, PF_RT_ANCHOR_RELATIVE, rule->anchor_relative); nlattr_add_u8(nw, PF_RT_ANCHOR_WILDCARD, rule->anchor_wildcard); nlattr_add_u8(nw, PF_RT_FLUSH, rule->flush); nlattr_add_u8(nw, PF_RT_PRIO, rule->prio); nlattr_add_u8(nw, PF_RT_SET_PRIO, rule->set_prio[0]); nlattr_add_u8(nw, PF_RT_SET_PRIO_REPLY, rule->set_prio[1]); nlattr_add_in6_addr(nw, PF_RT_DIVERT_ADDRESS, &rule->divert.addr.v6); nlattr_add_u16(nw, PF_RT_DIVERT_PORT, rule->divert.port); nlattr_add_u64(nw, PF_RT_PACKETS_IN, pf_counter_u64_fetch(&rule->packets[0])); nlattr_add_u64(nw, PF_RT_PACKETS_OUT, pf_counter_u64_fetch(&rule->packets[1])); nlattr_add_u64(nw, PF_RT_BYTES_IN, pf_counter_u64_fetch(&rule->bytes[0])); nlattr_add_u64(nw, PF_RT_BYTES_OUT, pf_counter_u64_fetch(&rule->bytes[1])); nlattr_add_u64(nw, PF_RT_EVALUATIONS, pf_counter_u64_fetch(&rule->evaluations)); nlattr_add_u64(nw, PF_RT_TIMESTAMP, pf_get_timestamp(rule)); nlattr_add_u64(nw, PF_RT_STATES_CUR, counter_u64_fetch(rule->states_cur)); nlattr_add_u64(nw, PF_RT_STATES_TOTAL, counter_u64_fetch(rule->states_tot)); nlattr_add_u64(nw, PF_RT_SRC_NODES, counter_u64_fetch(rule->src_nodes)); error = pf_kanchor_copyout(ruleset, rule, anchor_call, sizeof(anchor_call)); MPASS(error == 0); nlattr_add_string(nw, PF_RT_ANCHOR_CALL, anchor_call); if (attrs.clear) pf_krule_clear_counters(rule); PF_RULES_WUNLOCK(); if (!nlmsg_end(nw)) { error = ENOMEM; goto out; } return (0); out: nlmsg_abort(nw); return (error); } #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct pf_kstate_kill, _field) static const struct nlattr_parser nla_p_clear_states[] = { { .type = PF_CS_CMP_ID, .off = _OUT(psk_pfcmp.id), .cb = nlattr_get_uint64 }, { .type = PF_CS_CMP_CREATORID, .off = _OUT(psk_pfcmp.creatorid), .cb = nlattr_get_uint32 }, { .type = PF_CS_CMP_DIR, .off = _OUT(psk_pfcmp.direction), .cb = nlattr_get_uint8 }, { .type = PF_CS_AF, .off = _OUT(psk_af), .cb = nlattr_get_uint8 }, { .type = PF_CS_PROTO, .off = _OUT(psk_proto), .cb = nlattr_get_uint8 }, { .type = PF_CS_SRC, .off = _OUT(psk_src), .arg = &rule_addr_parser, .cb = nlattr_get_nested }, { .type = PF_CS_DST, .off = _OUT(psk_dst), .arg = &rule_addr_parser, .cb = nlattr_get_nested }, { .type = PF_CS_RT_ADDR, .off = _OUT(psk_rt_addr), .arg = &rule_addr_parser, .cb = nlattr_get_nested }, { .type = PF_CS_IFNAME, .off = _OUT(psk_ifname), .arg = (void *)IFNAMSIZ, .cb = nlattr_get_chara }, { .type = PF_CS_LABEL, .off = _OUT(psk_label), .arg = (void *)PF_RULE_LABEL_SIZE, .cb = nlattr_get_chara }, { .type = PF_CS_KILL_MATCH, .off = _OUT(psk_kill_match), .cb = nlattr_get_bool }, { .type = PF_CS_NAT, .off = _OUT(psk_nat), .cb = nlattr_get_bool }, }; static const struct nlfield_parser nlf_p_clear_states[] = {}; #undef _IN #undef _OUT NL_DECLARE_PARSER(clear_states_parser, struct genlmsghdr, nlf_p_clear_states, nla_p_clear_states); static int pf_handle_killclear_states(struct nlmsghdr *hdr, struct nl_pstate *npt, int cmd) { struct pf_kstate_kill kill = {}; struct epoch_tracker et; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; unsigned int killed = 0; error = nl_parse_nlmsg(hdr, &clear_states_parser, npt, &kill); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = cmd; ghdr_new->version = 0; ghdr_new->reserved = 0; NET_EPOCH_ENTER(et); if (cmd == PFNL_CMD_KILLSTATES) pf_killstates(&kill, &killed); else killed = pf_clear_states(&kill); NET_EPOCH_EXIT(et); nlattr_add_u32(nw, PF_CS_KILLED, killed); if (! nlmsg_end(nw)) { error = ENOMEM; goto out; } return (0); out: nlmsg_abort(nw); return (error); } static int pf_handle_clear_states(struct nlmsghdr *hdr, struct nl_pstate *npt) { return (pf_handle_killclear_states(hdr, npt, PFNL_CMD_CLRSTATES)); } static int pf_handle_kill_states(struct nlmsghdr *hdr, struct nl_pstate *npt) { return (pf_handle_killclear_states(hdr, npt, PFNL_CMD_KILLSTATES)); } struct nl_parsed_set_statusif { char ifname[IFNAMSIZ]; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_parsed_set_statusif, _field) static const struct nlattr_parser nla_p_set_statusif[] = { { .type = PF_SS_IFNAME, .off = _OUT(ifname), .arg = (const void *)IFNAMSIZ, .cb = nlattr_get_chara }, }; static const struct nlfield_parser nlf_p_set_statusif[] = {}; #undef _IN #undef _OUT NL_DECLARE_PARSER(set_statusif_parser, struct genlmsghdr, nlf_p_set_statusif, nla_p_set_statusif); static int pf_handle_set_statusif(struct nlmsghdr *hdr, struct nl_pstate *npt) { int error; struct nl_parsed_set_statusif attrs = {}; error = nl_parse_nlmsg(hdr, &set_statusif_parser, npt, &attrs); if (error != 0) return (error); PF_RULES_WLOCK(); strlcpy(V_pf_status.ifname, attrs.ifname, IFNAMSIZ); PF_RULES_WUNLOCK(); return (0); } static bool nlattr_add_counters(struct nl_writer *nw, int attr, size_t number, char **names, counter_u64_t *counters) { for (int i = 0; i < number; i++) { int off = nlattr_add_nested(nw, attr); nlattr_add_u32(nw, PF_C_ID, i); nlattr_add_string(nw, PF_C_NAME, names[i]); nlattr_add_u64(nw, PF_C_COUNTER, counter_u64_fetch(counters[i])); nlattr_set_len(nw, off); } return (true); } static bool nlattr_add_fcounters(struct nl_writer *nw, int attr, size_t number, char **names, struct pf_counter_u64 *counters) { for (int i = 0; i < number; i++) { int off = nlattr_add_nested(nw, attr); nlattr_add_u32(nw, PF_C_ID, i); nlattr_add_string(nw, PF_C_NAME, names[i]); nlattr_add_u64(nw, PF_C_COUNTER, pf_counter_u64_fetch(&counters[i])); nlattr_set_len(nw, off); } return (true); } static bool nlattr_add_u64_array(struct nl_writer *nw, int attr, size_t number, uint64_t *array) { int off = nlattr_add_nested(nw, attr); for (size_t i = 0; i < number; i++) nlattr_add_u64(nw, 0, array[i]); nlattr_set_len(nw, off); return (true); } static int pf_handle_get_status(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_status s; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES; char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES; char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES; int error; PF_RULES_RLOCK_TRACKER; if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_STATUS; ghdr_new->version = 0; ghdr_new->reserved = 0; PF_RULES_RLOCK(); nlattr_add_string(nw, PF_GS_IFNAME, V_pf_status.ifname); nlattr_add_bool(nw, PF_GS_RUNNING, V_pf_status.running); nlattr_add_u32(nw, PF_GS_SINCE, V_pf_status.since); nlattr_add_u32(nw, PF_GS_DEBUG, V_pf_status.debug); nlattr_add_u32(nw, PF_GS_HOSTID, ntohl(V_pf_status.hostid)); nlattr_add_u32(nw, PF_GS_STATES, V_pf_status.states); nlattr_add_u32(nw, PF_GS_SRC_NODES, V_pf_status.src_nodes); nlattr_add_u32(nw, PF_GS_REASSEMBLE, V_pf_status.reass); nlattr_add_u32(nw, PF_GS_SYNCOOKIES_ACTIVE, V_pf_status.syncookies_active); nlattr_add_counters(nw, PF_GS_COUNTERS, PFRES_MAX, pf_reasons, V_pf_status.counters); nlattr_add_counters(nw, PF_GS_LCOUNTERS, KLCNT_MAX, pf_lcounter, V_pf_status.lcounters); nlattr_add_fcounters(nw, PF_GS_FCOUNTERS, FCNT_MAX, pf_fcounter, V_pf_status.fcounters); nlattr_add_counters(nw, PF_GS_SCOUNTERS, SCNT_MAX, pf_fcounter, V_pf_status.scounters); pfi_update_status(V_pf_status.ifname, &s); nlattr_add_u64_array(nw, PF_GS_BCOUNTERS, 2 * 2, (uint64_t *)s.bcounters); nlattr_add_u64_array(nw, PF_GS_PCOUNTERS, 2 * 2 * 2, (uint64_t *)s.pcounters); nlattr_add(nw, PF_GS_CHKSUM, PF_MD5_DIGEST_LENGTH, V_pf_status.pf_chksum); PF_RULES_RUNLOCK(); if (!nlmsg_end(nw)) { error = ENOMEM; goto out; } return (0); out: nlmsg_abort(nw); return (error); } static int pf_handle_clear_status(struct nlmsghdr *hdr, struct nl_pstate *npt) { pf_ioctl_clear_status(); return (0); } struct pf_nl_natlook { sa_family_t af; uint8_t direction; uint8_t proto; struct pf_addr src; struct pf_addr dst; uint16_t sport; uint16_t dport; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct pf_nl_natlook, _field) static const struct nlattr_parser nla_p_natlook[] = { { .type = PF_NL_AF, .off = _OUT(af), .cb = nlattr_get_uint8 }, { .type = PF_NL_DIRECTION, .off = _OUT(direction), .cb = nlattr_get_uint8 }, { .type = PF_NL_PROTO, .off = _OUT(proto), .cb = nlattr_get_uint8 }, { .type = PF_NL_SRC_ADDR, .off = _OUT(src), .cb = nlattr_get_in6_addr }, { .type = PF_NL_DST_ADDR, .off = _OUT(dst), .cb = nlattr_get_in6_addr }, { .type = PF_NL_SRC_PORT, .off = _OUT(sport), .cb = nlattr_get_uint16 }, { .type = PF_NL_DST_PORT, .off = _OUT(dport), .cb = nlattr_get_uint16 }, }; static const struct nlfield_parser nlf_p_natlook[] = {}; #undef _IN #undef _OUT NL_DECLARE_PARSER(natlook_parser, struct genlmsghdr, nlf_p_natlook, nla_p_natlook); static int pf_handle_natlook(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_nl_natlook attrs = {}; struct pf_state_key_cmp key = {}; struct nl_writer *nw = npt->nw; struct pf_state_key *sk; struct pf_kstate *state; struct genlmsghdr *ghdr_new; int error, m = 0; int sidx, didx; error = nl_parse_nlmsg(hdr, &natlook_parser, npt, &attrs); if (error != 0) return (error); if (attrs.proto == 0 || PF_AZERO(&attrs.src, attrs.af) || PF_AZERO(&attrs.dst, attrs.af) || ((attrs.proto == IPPROTO_TCP || attrs.proto == IPPROTO_UDP) && (attrs.sport == 0 || attrs.dport == 0))) return (EINVAL); /* NATLOOK src and dst are reversed, so reverse sidx/didx */ sidx = (attrs.direction == PF_IN) ? 1 : 0; didx = (attrs.direction == PF_IN) ? 0 : 1; key.af = attrs.af; key.proto = attrs.proto; PF_ACPY(&key.addr[sidx], &attrs.src, attrs.af); key.port[sidx] = attrs.sport; PF_ACPY(&key.addr[didx], &attrs.dst, attrs.af); key.port[didx] = attrs.dport; state = pf_find_state_all(&key, attrs.direction, &m); if (state == NULL) return (ENOENT); if (m > 1) { PF_STATE_UNLOCK(state); return (E2BIG); } if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { PF_STATE_UNLOCK(state); return (ENOMEM); } ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_NATLOOK; ghdr_new->version = 0; ghdr_new->reserved = 0; sk = state->key[sidx]; nlattr_add_in6_addr(nw, PF_NL_SRC_ADDR, &sk->addr[sidx].v6); nlattr_add_in6_addr(nw, PF_NL_DST_ADDR, &sk->addr[didx].v6); nlattr_add_u16(nw, PF_NL_SRC_PORT, sk->port[sidx]); nlattr_add_u16(nw, PF_NL_DST_PORT, sk->port[didx]); PF_STATE_UNLOCK(state); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } struct pf_nl_set_debug { uint32_t level; }; #define _OUT(_field) offsetof(struct pf_nl_set_debug, _field) static const struct nlattr_parser nla_p_set_debug[] = { { .type = PF_SD_LEVEL, .off = _OUT(level), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_set_debug[] = {}; #undef _OUT NL_DECLARE_PARSER(set_debug_parser, struct genlmsghdr, nlf_p_set_debug, nla_p_set_debug); static int pf_handle_set_debug(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_nl_set_debug attrs = {}; int error; error = nl_parse_nlmsg(hdr, &set_debug_parser, npt, &attrs); if (error != 0) return (error); PF_RULES_WLOCK(); V_pf_status.debug = attrs.level; PF_RULES_WUNLOCK(); return (0); } struct pf_nl_set_timeout { uint32_t timeout; uint32_t seconds; }; #define _OUT(_field) offsetof(struct pf_nl_set_timeout, _field) static const struct nlattr_parser nla_p_set_timeout[] = { { .type = PF_TO_TIMEOUT, .off = _OUT(timeout), .cb = nlattr_get_uint32 }, { .type = PF_TO_SECONDS, .off = _OUT(seconds), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_set_timeout[] = {}; #undef _OUT NL_DECLARE_PARSER(set_timeout_parser, struct genlmsghdr, nlf_p_set_timeout, nla_p_set_timeout); static int pf_handle_set_timeout(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_nl_set_timeout attrs = {}; int error; error = nl_parse_nlmsg(hdr, &set_timeout_parser, npt, &attrs); if (error != 0) return (error); return (pf_ioctl_set_timeout(attrs.timeout, attrs.seconds, NULL)); } static int pf_handle_get_timeout(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_nl_set_timeout attrs = {}; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &set_timeout_parser, npt, &attrs); if (error != 0) return (error); error = pf_ioctl_get_timeout(attrs.timeout, &attrs.seconds); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_TIMEOUT; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_TO_SECONDS, attrs.seconds); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } struct pf_nl_set_limit { uint32_t index; uint32_t limit; }; #define _OUT(_field) offsetof(struct pf_nl_set_limit, _field) static const struct nlattr_parser nla_p_set_limit[] = { { .type = PF_LI_INDEX, .off = _OUT(index), .cb = nlattr_get_uint32 }, { .type = PF_LI_LIMIT, .off = _OUT(limit), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_set_limit[] = {}; #undef _OUT NL_DECLARE_PARSER(set_limit_parser, struct genlmsghdr, nlf_p_set_limit, nla_p_set_limit); static int pf_handle_set_limit(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_nl_set_limit attrs = {}; int error; error = nl_parse_nlmsg(hdr, &set_limit_parser, npt, &attrs); if (error != 0) return (error); return (pf_ioctl_set_limit(attrs.index, attrs.limit, NULL)); } static int pf_handle_get_limit(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pf_nl_set_limit attrs = {}; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &set_limit_parser, npt, &attrs); if (error != 0) return (error); error = pf_ioctl_get_limit(attrs.index, &attrs.limit); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_LIMIT; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_LI_LIMIT, attrs.limit); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } static int pf_handle_begin_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; uint32_t ticket; int error; error = pf_ioctl_begin_addrs(&ticket); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_BEGIN_ADDRS; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_BA_TICKET, ticket); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } static bool nlattr_add_pool_addr(struct nl_writer *nw, int attrtype, struct pf_pooladdr *a) { int off; off = nlattr_add_nested(nw, attrtype); nlattr_add_addr_wrap(nw, PF_PA_ADDR, &a->addr); nlattr_add_string(nw, PF_PA_IFNAME, a->ifname); nlattr_set_len(nw, off); return (true); } #define _OUT(_field) offsetof(struct pf_pooladdr, _field) static const struct nlattr_parser nla_p_pool_addr[] = { { .type = PF_PA_ADDR, .off = _OUT(addr), .arg = &addr_wrap_parser, .cb = nlattr_get_nested }, { .type = PF_PA_IFNAME, .off = _OUT(ifname), .arg = (void *)IFNAMSIZ, .cb = nlattr_get_chara }, }; NL_DECLARE_ATTR_PARSER(pool_addr_parser, nla_p_pool_addr); #undef _OUT #define _OUT(_field) offsetof(struct pfioc_pooladdr, _field) static const struct nlattr_parser nla_p_add_addr[] = { { .type = PF_AA_ACTION, .off = _OUT(action), .cb = nlattr_get_uint32 }, { .type = PF_AA_TICKET, .off = _OUT(ticket), .cb = nlattr_get_uint32 }, { .type = PF_AA_NR, .off = _OUT(nr), .cb = nlattr_get_uint32 }, { .type = PF_AA_R_NUM, .off = _OUT(r_num), .cb = nlattr_get_uint32 }, { .type = PF_AA_R_ACTION, .off = _OUT(r_action), .cb = nlattr_get_uint8 }, { .type = PF_AA_R_LAST, .off = _OUT(r_last), .cb = nlattr_get_uint8 }, { .type = PF_AA_AF, .off = _OUT(af), .cb = nlattr_get_uint8 }, { .type = PF_AA_ANCHOR, .off = _OUT(anchor), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, { .type = PF_AA_ADDR, .off = _OUT(addr), .arg = &pool_addr_parser, .cb = nlattr_get_nested }, }; static const struct nlfield_parser nlf_p_add_addr[] = {}; #undef _OUT NL_DECLARE_PARSER(add_addr_parser, struct genlmsghdr, nlf_p_add_addr, nla_p_add_addr); static int pf_handle_add_addr(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pfioc_pooladdr attrs = { 0 }; int error; error = nl_parse_nlmsg(hdr, &add_addr_parser, npt, &attrs); if (error != 0) return (error); error = pf_ioctl_add_addr(&attrs); return (error); } static int pf_handle_get_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pfioc_pooladdr attrs = { 0 }; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &add_addr_parser, npt, &attrs); if (error != 0) return (error); error = pf_ioctl_get_addrs(&attrs); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_ADDRS; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_AA_NR, attrs.nr); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (error); } static int pf_handle_get_addr(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pfioc_pooladdr attrs = { 0 }; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &add_addr_parser, npt, &attrs); if (error != 0) return (error); error = pf_ioctl_get_addr(&attrs); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_ADDR; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_AA_ACTION, attrs.action); nlattr_add_u32(nw, PF_AA_TICKET, attrs.ticket); nlattr_add_u32(nw, PF_AA_NR, attrs.nr); nlattr_add_u32(nw, PF_AA_R_NUM, attrs.r_num); nlattr_add_u8(nw, PF_AA_R_ACTION, attrs.r_action); nlattr_add_u8(nw, PF_AA_R_LAST, attrs.r_last); nlattr_add_u8(nw, PF_AA_AF, attrs.af); nlattr_add_string(nw, PF_AA_ANCHOR, attrs.anchor); nlattr_add_pool_addr(nw, PF_AA_ADDR, &attrs.addr); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } #define _OUT(_field) offsetof(struct pfioc_ruleset, _field) static const struct nlattr_parser nla_p_ruleset[] = { { .type = PF_RS_PATH, .off = _OUT(path), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, { .type = PF_RS_NR, .off = _OUT(nr), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_ruleset[] = { }; NL_DECLARE_PARSER(ruleset_parser, struct genlmsghdr, nlf_p_ruleset, nla_p_ruleset); #undef _OUT static int pf_handle_get_rulesets(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pfioc_ruleset attrs = { 0 }; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &ruleset_parser, npt, &attrs); if (error != 0) return (error); error = pf_ioctl_get_rulesets(&attrs); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_RULESETS; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PF_RS_NR, attrs.nr); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } static int pf_handle_get_ruleset(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pfioc_ruleset attrs = { 0 }; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &ruleset_parser, npt, &attrs); if (error) return (error); error = pf_ioctl_get_ruleset(&attrs); if (error != 0) return (error); if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (ENOMEM); ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_RULESET; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_string(nw, PF_RS_NAME, attrs.name); if (!nlmsg_end(nw)) { nlmsg_abort(nw); return (ENOMEM); } return (0); } static bool nlattr_add_pf_threshold(struct nl_writer *nw, int attrtype, struct pf_threshold *t, int secs) { int off = nlattr_add_nested(nw, attrtype); int diff, conn_rate_count; /* Adjust the connection rate estimate. */ conn_rate_count = t->count; diff = secs - t->last; if (diff >= t->seconds) conn_rate_count = 0; else conn_rate_count -= t->count * diff / t->seconds; nlattr_add_u32(nw, PF_TH_LIMIT, t->limit); nlattr_add_u32(nw, PF_TH_SECONDS, t->seconds); nlattr_add_u32(nw, PF_TH_COUNT, conn_rate_count); nlattr_add_u32(nw, PF_TH_LAST, t->last); nlattr_set_len(nw, off); return (true); } static int pf_handle_get_srcnodes(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; struct pf_ksrc_node *n; struct pf_srchash *sh; int i; int secs; hdr->nlmsg_flags |= NLM_F_MULTI; for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { /* Avoid locking empty rows. */ if (LIST_EMPTY(&sh->nodes)) continue; PF_HASHROW_LOCK(sh); secs = time_uptime; LIST_FOREACH(n, &sh->nodes, entry) { if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { nlmsg_abort(nw); return (ENOMEM); } ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFNL_CMD_GET_SRCNODES; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_in6_addr(nw, PF_SN_ADDR, &n->addr.v6); nlattr_add_in6_addr(nw, PF_SN_RADDR, &n->raddr.v6); nlattr_add_u32(nw, PF_SN_RULE_NR, n->rule->nr); nlattr_add_u64(nw, PF_SN_BYTES_IN, counter_u64_fetch(n->bytes[0])); nlattr_add_u64(nw, PF_SN_BYTES_OUT, counter_u64_fetch(n->bytes[1])); nlattr_add_u64(nw, PF_SN_PACKETS_IN, counter_u64_fetch(n->packets[0])); nlattr_add_u64(nw, PF_SN_PACKETS_OUT, counter_u64_fetch(n->packets[1])); nlattr_add_u32(nw, PF_SN_STATES, n->states); nlattr_add_u32(nw, PF_SN_CONNECTIONS, n->conn); nlattr_add_u8(nw, PF_SN_AF, n->af); nlattr_add_u8(nw, PF_SN_RULE_TYPE, n->ruletype); nlattr_add_u64(nw, PF_SN_CREATION, secs - n->creation); if (n->expire > secs) nlattr_add_u64(nw, PF_SN_EXPIRE, n->expire - secs); else nlattr_add_u64(nw, PF_SN_EXPIRE, 0); nlattr_add_pf_threshold(nw, PF_SN_CONNECTION_RATE, &n->conn_rate, secs); if (!nlmsg_end(nw)) { PF_HASHROW_UNLOCK(sh); nlmsg_abort(nw); return (ENOMEM); } } PF_HASHROW_UNLOCK(sh); } return (0); } static const struct nlhdr_parser *all_parsers[] = { &state_parser, &addrule_parser, &getrules_parser, &clear_states_parser, &set_statusif_parser, &natlook_parser, &set_debug_parser, &set_timeout_parser, &set_limit_parser, &pool_addr_parser, &add_addr_parser, &ruleset_parser, }; static int family_id; static const struct genl_cmd pf_cmds[] = { { .cmd_num = PFNL_CMD_GETSTATES, .cmd_name = "GETSTATES", .cmd_cb = pf_handle_getstates, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GETCREATORS, .cmd_name = "GETCREATORS", .cmd_cb = pf_handle_getcreators, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_START, .cmd_name = "START", .cmd_cb = pf_handle_start, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_STOP, .cmd_name = "STOP", .cmd_cb = pf_handle_stop, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_ADDRULE, .cmd_name = "ADDRULE", .cmd_cb = pf_handle_addrule, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GETRULES, .cmd_name = "GETRULES", .cmd_cb = pf_handle_getrules, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GETRULE, .cmd_name = "GETRULE", .cmd_cb = pf_handle_getrule, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_CLRSTATES, .cmd_name = "CLRSTATES", .cmd_cb = pf_handle_clear_states, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_KILLSTATES, .cmd_name = "KILLSTATES", .cmd_cb = pf_handle_kill_states, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_SET_STATUSIF, .cmd_name = "SETSTATUSIF", .cmd_cb = pf_handle_set_statusif, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_STATUS, .cmd_name = "GETSTATUS", .cmd_cb = pf_handle_get_status, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_CLEAR_STATUS, .cmd_name = "CLEARSTATUS", .cmd_cb = pf_handle_clear_status, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_NATLOOK, .cmd_name = "NATLOOK", .cmd_cb = pf_handle_natlook, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_SET_DEBUG, .cmd_name = "SET_DEBUG", .cmd_cb = pf_handle_set_debug, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_SET_TIMEOUT, .cmd_name = "SET_TIMEOUT", .cmd_cb = pf_handle_set_timeout, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_TIMEOUT, .cmd_name = "GET_TIMEOUT", .cmd_cb = pf_handle_get_timeout, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_SET_LIMIT, .cmd_name = "SET_LIMIT", .cmd_cb = pf_handle_set_limit, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_LIMIT, .cmd_name = "GET_LIMIT", .cmd_cb = pf_handle_get_limit, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_BEGIN_ADDRS, .cmd_name = "BEGIN_ADDRS", .cmd_cb = pf_handle_begin_addrs, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_ADD_ADDR, .cmd_name = "ADD_ADDR", .cmd_cb = pf_handle_add_addr, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_ADDRS, .cmd_name = "GET_ADDRS", .cmd_cb = pf_handle_get_addrs, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_ADDR, .cmd_name = "GET_ADDRS", .cmd_cb = pf_handle_get_addr, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_RULESETS, .cmd_name = "GET_RULESETS", .cmd_cb = pf_handle_get_rulesets, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_RULESET, .cmd_name = "GET_RULESET", .cmd_cb = pf_handle_get_ruleset, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFNL_CMD_GET_SRCNODES, .cmd_name = "GET_SRCNODES", .cmd_cb = pf_handle_get_srcnodes, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, }; void pf_nl_register(void) { NL_VERIFY_PARSERS(all_parsers); family_id = genl_register_family(PFNL_FAMILY_NAME, 0, 2, PFNL_CMD_MAX); - genl_register_cmds(PFNL_FAMILY_NAME, pf_cmds, NL_ARRAY_LEN(pf_cmds)); + genl_register_cmds(PFNL_FAMILY_NAME, pf_cmds, nitems(pf_cmds)); } void pf_nl_unregister(void) { genl_unregister_family(PFNL_FAMILY_NAME); } diff --git a/sys/netpfil/pf/pflow.c b/sys/netpfil/pf/pflow.c index 36b528290306..397d720215b2 100644 --- a/sys/netpfil/pf/pflow.c +++ b/sys/netpfil/pf/pflow.c @@ -1,1850 +1,1851 @@ /* $OpenBSD: if_pflow.c,v 1.100 2023/11/09 08:53:20 mvs Exp $ */ /* * Copyright (c) 2023 Rubicon Communications, LLC (Netgate) * Copyright (c) 2011 Florian Obser * Copyright (c) 2011 Sebastian Benoit * Copyright (c) 2008 Henning Brauer * Copyright (c) 2008 Joerg Goltermann * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER IN * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "net/if_var.h" #define PFLOW_MINMTU \ (sizeof(struct pflow_header) + sizeof(struct pflow_flow)) #ifdef PFLOWDEBUG #define DPRINTF(x) do { printf x ; } while (0) #else #define DPRINTF(x) #endif enum pflow_family_t { PFLOW_INET, PFLOW_INET6, PFLOW_NAT4, }; static void pflow_output_process(void *); static int pflow_create(int); static int pflow_destroy(int, bool); static int pflow_calc_mtu(struct pflow_softc *, int, int); static void pflow_setmtu(struct pflow_softc *, int); static int pflowvalidsockaddr(const struct sockaddr *, int); static struct mbuf *pflow_get_mbuf(struct pflow_softc *, u_int16_t); static void pflow_flush(struct pflow_softc *); static int pflow_sendout_v5(struct pflow_softc *); static int pflow_sendout_ipfix(struct pflow_softc *, enum pflow_family_t); static int pflow_sendout_ipfix_tmpl(struct pflow_softc *); static int pflow_sendout_mbuf(struct pflow_softc *, struct mbuf *); static int sysctl_pflowstats(SYSCTL_HANDLER_ARGS); static void pflow_timeout(void *); static void pflow_timeout6(void *); static void pflow_timeout_tmpl(void *); static void pflow_timeout_nat4(void *); static void copy_flow_data(struct pflow_flow *, struct pflow_flow *, const struct pf_kstate *, struct pf_state_key *, int, int); static void copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *, struct pflow_ipfix_flow4 *, const struct pf_kstate *, struct pf_state_key *, struct pflow_softc *, int, int); static void copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *, struct pflow_ipfix_flow6 *, const struct pf_kstate *, struct pf_state_key *, struct pflow_softc *, int, int); static int pflow_pack_flow(const struct pf_kstate *, struct pf_state_key *, struct pflow_softc *); static int pflow_pack_flow_ipfix(const struct pf_kstate *, struct pf_state_key *, struct pflow_softc *); static void export_pflow(const struct pf_kstate *); static int export_pflow_if(const struct pf_kstate*, struct pf_state_key *, struct pflow_softc *); static int copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc); static int copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc); static int copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc); static int copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *, const struct pf_kstate *, struct pflow_softc *, uint8_t, uint64_t); static const char pflowname[] = "pflow"; enum pflowstat_counters { pflow_flows, pflow_packets, pflow_onomem, pflow_oerrors, pflow_ncounters, }; struct pflowstats_ctr { counter_u64_t c[pflow_ncounters]; }; /** * Locking concept * * The list of pflow devices (V_pflowif_list) is managed through epoch. * It is safe to read the list without locking (while in NET_EPOCH). * There may only be one simultaneous modifier, hence we need V_pflow_list_mtx * on every add/delete. * * Each pflow interface protects its own data with the sc_lock mutex. * * We do not require any pf locks, and in fact expect to be called without * hashrow locks held. **/ VNET_DEFINE(struct unrhdr *, pflow_unr); #define V_pflow_unr VNET(pflow_unr) VNET_DEFINE(CK_LIST_HEAD(, pflow_softc), pflowif_list); #define V_pflowif_list VNET(pflowif_list) VNET_DEFINE(struct mtx, pflowif_list_mtx); #define V_pflowif_list_mtx VNET(pflowif_list_mtx) VNET_DEFINE(struct pflowstats_ctr, pflowstat); #define V_pflowstats VNET(pflowstat) #define PFLOW_LOCK(_sc) mtx_lock(&(_sc)->sc_lock) #define PFLOW_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_lock) #define PFLOW_ASSERT(_sc) mtx_assert(&(_sc)->sc_lock, MA_OWNED) SYSCTL_NODE(_net, OID_AUTO, pflow, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "PFLOW"); SYSCTL_PROC(_net_pflow, OID_AUTO, stats, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_pflowstats, "S,pflowstats", "PFLOW statistics (struct pflowstats, net/if_pflow.h)"); static inline void pflowstat_inc(enum pflowstat_counters c) { counter_u64_add(V_pflowstats.c[c], 1); } static void vnet_pflowattach(void) { CK_LIST_INIT(&V_pflowif_list); mtx_init(&V_pflowif_list_mtx, "pflow interface list mtx", NULL, MTX_DEF); V_pflow_unr = new_unrhdr(0, PFLOW_MAX_ENTRIES - 1, &V_pflowif_list_mtx); for (int i = 0; i < pflow_ncounters; i++) V_pflowstats.c[i] = counter_u64_alloc(M_WAITOK); } VNET_SYSINIT(vnet_pflowattach, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, vnet_pflowattach, NULL); static int pflow_jail_remove(void *obj, void *data __unused) { #ifdef VIMAGE const struct prison *pr = obj; #endif struct pflow_softc *sc; CURVNET_SET(pr->pr_vnet); CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) { pflow_destroy(sc->sc_id, false); } CURVNET_RESTORE(); return (0); } static void vnet_pflowdetach(void) { /* Should have been done by pflow_jail_remove() */ MPASS(CK_LIST_EMPTY(&V_pflowif_list)); delete_unrhdr(V_pflow_unr); mtx_destroy(&V_pflowif_list_mtx); for (int i = 0; i < pflow_ncounters; i++) counter_u64_free(V_pflowstats.c[i]); } VNET_SYSUNINIT(vnet_pflowdetach, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, vnet_pflowdetach, NULL); static void vnet_pflow_finalise(void) { /* * Ensure we've freed all interfaces, and do not have pending * epoch cleanup calls. */ NET_EPOCH_DRAIN_CALLBACKS(); } VNET_SYSUNINIT(vnet_pflow_finalise, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, vnet_pflow_finalise, NULL); static void pflow_output_process(void *arg) { struct mbufq ml; struct pflow_softc *sc = arg; struct mbuf *m; mbufq_init(&ml, 0); PFLOW_LOCK(sc); mbufq_concat(&ml, &sc->sc_outputqueue); PFLOW_UNLOCK(sc); CURVNET_SET(sc->sc_vnet); while ((m = mbufq_dequeue(&ml)) != NULL) { pflow_sendout_mbuf(sc, m); } CURVNET_RESTORE(); } static int pflow_create(int unit) { struct pflow_softc *pflowif; int error; pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO); mtx_init(&pflowif->sc_lock, "pflowlk", NULL, MTX_DEF); pflowif->sc_version = PFLOW_PROTO_DEFAULT; pflowif->sc_observation_dom = PFLOW_ENGINE_TYPE; /* ipfix template init */ bzero(&pflowif->sc_tmpl_ipfix,sizeof(pflowif->sc_tmpl_ipfix)); pflowif->sc_tmpl_ipfix.set_header.set_id = htons(PFLOW_IPFIX_TMPL_SET_ID); pflowif->sc_tmpl_ipfix.set_header.set_length = htons(sizeof(struct pflow_ipfix_tmpl)); /* ipfix IPv4 template */ pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.tmpl_id = htons(PFLOW_IPFIX_TMPL_IPV4_ID); pflowif->sc_tmpl_ipfix.ipv4_tmpl.h.field_count = htons(PFLOW_IPFIX_TMPL_IPV4_FIELD_COUNT); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.field_id = htons(PFIX_IE_sourceIPv4Address); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_ip.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.field_id = htons(PFIX_IE_destinationIPv4Address); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_ip.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.field_id = htons(PFIX_IE_ingressInterface); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_in.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.field_id = htons(PFIX_IE_egressInterface); pflowif->sc_tmpl_ipfix.ipv4_tmpl.if_index_out.len = htons(4); pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.field_id = htons(PFIX_IE_packetDeltaCount); pflowif->sc_tmpl_ipfix.ipv4_tmpl.packets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.field_id = htons(PFIX_IE_octetDeltaCount); pflowif->sc_tmpl_ipfix.ipv4_tmpl.octets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.field_id = htons(PFIX_IE_flowStartMilliseconds); pflowif->sc_tmpl_ipfix.ipv4_tmpl.start.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.field_id = htons(PFIX_IE_flowEndMilliseconds); pflowif->sc_tmpl_ipfix.ipv4_tmpl.finish.len = htons(8); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.field_id = htons(PFIX_IE_sourceTransportPort); pflowif->sc_tmpl_ipfix.ipv4_tmpl.src_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.field_id = htons(PFIX_IE_destinationTransportPort); pflowif->sc_tmpl_ipfix.ipv4_tmpl.dest_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.field_id = htons(PFIX_IE_ipClassOfService); pflowif->sc_tmpl_ipfix.ipv4_tmpl.tos.len = htons(1); pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.field_id = htons(PFIX_IE_protocolIdentifier); pflowif->sc_tmpl_ipfix.ipv4_tmpl.protocol.len = htons(1); /* ipfix IPv6 template */ pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.tmpl_id = htons(PFLOW_IPFIX_TMPL_IPV6_ID); pflowif->sc_tmpl_ipfix.ipv6_tmpl.h.field_count = htons(PFLOW_IPFIX_TMPL_IPV6_FIELD_COUNT); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.field_id = htons(PFIX_IE_sourceIPv6Address); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_ip.len = htons(16); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.field_id = htons(PFIX_IE_destinationIPv6Address); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_ip.len = htons(16); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.field_id = htons(PFIX_IE_ingressInterface); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_in.len = htons(4); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.field_id = htons(PFIX_IE_egressInterface); pflowif->sc_tmpl_ipfix.ipv6_tmpl.if_index_out.len = htons(4); pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.field_id = htons(PFIX_IE_packetDeltaCount); pflowif->sc_tmpl_ipfix.ipv6_tmpl.packets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.field_id = htons(PFIX_IE_octetDeltaCount); pflowif->sc_tmpl_ipfix.ipv6_tmpl.octets.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.field_id = htons(PFIX_IE_flowStartMilliseconds); pflowif->sc_tmpl_ipfix.ipv6_tmpl.start.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.field_id = htons(PFIX_IE_flowEndMilliseconds); pflowif->sc_tmpl_ipfix.ipv6_tmpl.finish.len = htons(8); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.field_id = htons(PFIX_IE_sourceTransportPort); pflowif->sc_tmpl_ipfix.ipv6_tmpl.src_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.field_id = htons(PFIX_IE_destinationTransportPort); pflowif->sc_tmpl_ipfix.ipv6_tmpl.dest_port.len = htons(2); pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.field_id = htons(PFIX_IE_ipClassOfService); pflowif->sc_tmpl_ipfix.ipv6_tmpl.tos.len = htons(1); pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.field_id = htons(PFIX_IE_protocolIdentifier); pflowif->sc_tmpl_ipfix.ipv6_tmpl.protocol.len = htons(1); /* NAT44 create template */ pflowif->sc_tmpl_ipfix.nat44_tmpl.h.tmpl_id = htons(PFLOW_IPFIX_TMPL_NAT44_ID); pflowif->sc_tmpl_ipfix.nat44_tmpl.h.field_count = htons(PFLOW_IPFIX_TMPL_NAT44_FIELD_COUNT); pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.field_id = htons(PFIX_IE_timeStamp); pflowif->sc_tmpl_ipfix.nat44_tmpl.timestamp.len = htons(8); pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.field_id = htons(PFIX_IE_natEvent); pflowif->sc_tmpl_ipfix.nat44_tmpl.nat_event.len = htons(1); pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.field_id = htons(PFIX_IE_protocolIdentifier); pflowif->sc_tmpl_ipfix.nat44_tmpl.protocol.len = htons(1); pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.field_id = htons(PFIX_IE_sourceIPv4Address); pflowif->sc_tmpl_ipfix.nat44_tmpl.src_ip.len = htons(4); pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.field_id = htons(PFIX_IE_sourceTransportPort); pflowif->sc_tmpl_ipfix.nat44_tmpl.src_port.len = htons(2); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.field_id = htons(PFIX_IE_postNATSourceIPv4Address); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_ip.len = htons(4); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.field_id = htons(PFIX_IE_postNAPTSourceTransportPort); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_src_port.len = htons(2); pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.field_id = htons(PFIX_IE_destinationIPv4Address); pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_ip.len = htons(4); pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.field_id = htons(PFIX_IE_destinationTransportPort); pflowif->sc_tmpl_ipfix.nat44_tmpl.dst_port.len = htons(2); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.field_id = htons(PFIX_IE_postNATDestinationIPv4Address); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_ip.len = htons(4); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.field_id = htons(PFIX_IE_postNAPTDestinationTransportPort); pflowif->sc_tmpl_ipfix.nat44_tmpl.postnat_dst_port.len = htons(2); pflowif->sc_id = unit; pflowif->sc_vnet = curvnet; mbufq_init(&pflowif->sc_outputqueue, 8192); pflow_setmtu(pflowif, ETHERMTU); callout_init_mtx(&pflowif->sc_tmo, &pflowif->sc_lock, 0); callout_init_mtx(&pflowif->sc_tmo6, &pflowif->sc_lock, 0); callout_init_mtx(&pflowif->sc_tmo_nat4, &pflowif->sc_lock, 0); callout_init_mtx(&pflowif->sc_tmo_tmpl, &pflowif->sc_lock, 0); error = swi_add(&pflowif->sc_swi_ie, pflowname, pflow_output_process, pflowif, SWI_NET, INTR_MPSAFE, &pflowif->sc_swi_cookie); if (error) { free(pflowif, M_DEVBUF); return (error); } /* Insert into list of pflows */ mtx_lock(&V_pflowif_list_mtx); CK_LIST_INSERT_HEAD(&V_pflowif_list, pflowif, sc_next); mtx_unlock(&V_pflowif_list_mtx); V_pflow_export_state_ptr = export_pflow; return (0); } static void pflow_free_cb(struct epoch_context *ctx) { struct pflow_softc *sc; sc = __containerof(ctx, struct pflow_softc, sc_epoch_ctx); free(sc, M_DEVBUF); } static int pflow_destroy(int unit, bool drain) { struct pflow_softc *sc; int error __diagused; mtx_lock(&V_pflowif_list_mtx); CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) { if (sc->sc_id == unit) break; } if (sc == NULL) { mtx_unlock(&V_pflowif_list_mtx); return (ENOENT); } CK_LIST_REMOVE(sc, sc_next); if (CK_LIST_EMPTY(&V_pflowif_list)) V_pflow_export_state_ptr = NULL; mtx_unlock(&V_pflowif_list_mtx); sc->sc_dying = 1; if (drain) { /* Let's be sure no one is using this interface any more. */ NET_EPOCH_DRAIN_CALLBACKS(); } error = swi_remove(sc->sc_swi_cookie); MPASS(error == 0); error = intr_event_destroy(sc->sc_swi_ie); MPASS(error == 0); callout_drain(&sc->sc_tmo); callout_drain(&sc->sc_tmo6); callout_drain(&sc->sc_tmo_nat4); callout_drain(&sc->sc_tmo_tmpl); m_freem(sc->sc_mbuf); m_freem(sc->sc_mbuf6); m_freem(sc->sc_mbuf_nat4); PFLOW_LOCK(sc); mbufq_drain(&sc->sc_outputqueue); if (sc->so != NULL) { soclose(sc->so); sc->so = NULL; } if (sc->sc_flowdst != NULL) free(sc->sc_flowdst, M_DEVBUF); if (sc->sc_flowsrc != NULL) free(sc->sc_flowsrc, M_DEVBUF); PFLOW_UNLOCK(sc); mtx_destroy(&sc->sc_lock); free_unr(V_pflow_unr, unit); NET_EPOCH_CALL(pflow_free_cb, &sc->sc_epoch_ctx); return (0); } static int pflowvalidsockaddr(const struct sockaddr *sa, int ignore_port) { const struct sockaddr_in6 *sin6; const struct sockaddr_in *sin; if (sa == NULL) return (0); switch(sa->sa_family) { case AF_INET: sin = (const struct sockaddr_in *)sa; return (sin->sin_addr.s_addr != INADDR_ANY && (ignore_port || sin->sin_port != 0)); case AF_INET6: sin6 = (const struct sockaddr_in6 *)sa; return (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) && (ignore_port || sin6->sin6_port != 0)); default: return (0); } } int pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz) { size_t min; sc->sc_maxcount4 = (mtu - hdrsz - sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4); sc->sc_maxcount6 = (mtu - hdrsz - sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow6); sc->sc_maxcount_nat4 = (mtu - hdrsz - sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_nat4); if (sc->sc_maxcount4 > PFLOW_MAXFLOWS) sc->sc_maxcount4 = PFLOW_MAXFLOWS; if (sc->sc_maxcount6 > PFLOW_MAXFLOWS) sc->sc_maxcount6 = PFLOW_MAXFLOWS; if (sc->sc_maxcount_nat4 > PFLOW_MAXFLOWS) sc->sc_maxcount_nat4 = PFLOW_MAXFLOWS; min = MIN(sc->sc_maxcount4 * sizeof(struct pflow_ipfix_flow4), sc->sc_maxcount6 * sizeof(struct pflow_ipfix_flow6)); min = MIN(min, sc->sc_maxcount_nat4 * sizeof(struct pflow_ipfix_nat4)); return (hdrsz + sizeof(struct udpiphdr) + min); } static void pflow_setmtu(struct pflow_softc *sc, int mtu_req) { int mtu; mtu = mtu_req; switch (sc->sc_version) { case PFLOW_PROTO_5: sc->sc_maxcount = (mtu - sizeof(struct pflow_header) - sizeof(struct udpiphdr)) / sizeof(struct pflow_flow); if (sc->sc_maxcount > PFLOW_MAXFLOWS) sc->sc_maxcount = PFLOW_MAXFLOWS; break; case PFLOW_PROTO_10: pflow_calc_mtu(sc, mtu, sizeof(struct pflow_v10_header)); break; default: /* NOTREACHED */ break; } } static struct mbuf * pflow_get_mbuf(struct pflow_softc *sc, u_int16_t set_id) { struct pflow_set_header set_hdr; struct pflow_header h; struct mbuf *m; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { pflowstat_inc(pflow_onomem); return (NULL); } MCLGET(m, M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); pflowstat_inc(pflow_onomem); return (NULL); } m->m_len = m->m_pkthdr.len = 0; if (sc == NULL) /* get only a new empty mbuf */ return (m); switch (sc->sc_version) { case PFLOW_PROTO_5: /* populate pflow_header */ h.reserved1 = 0; h.reserved2 = 0; h.count = 0; h.version = htons(PFLOW_PROTO_5); h.flow_sequence = htonl(sc->sc_gcounter); h.engine_type = PFLOW_ENGINE_TYPE; h.engine_id = PFLOW_ENGINE_ID; m_copyback(m, 0, PFLOW_HDRLEN, (caddr_t)&h); sc->sc_count = 0; callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz, pflow_timeout, sc); break; case PFLOW_PROTO_10: /* populate pflow_set_header */ set_hdr.set_length = 0; set_hdr.set_id = htons(set_id); m_copyback(m, 0, PFLOW_SET_HDRLEN, (caddr_t)&set_hdr); break; default: /* NOTREACHED */ break; } return (m); } static void copy_flow_data(struct pflow_flow *flow1, struct pflow_flow *flow2, const struct pf_kstate *st, struct pf_state_key *sk, int src, int dst) { flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr; flow1->src_port = flow2->dest_port = sk->port[src]; flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr; flow1->dest_port = flow2->src_port = sk->port[dst]; flow1->dest_as = flow2->src_as = flow1->src_as = flow2->dest_as = 0; flow1->if_index_in = htons(st->if_index_in); flow1->if_index_out = htons(st->if_index_out); flow2->if_index_in = htons(st->if_index_out); flow2->if_index_out = htons(st->if_index_in); flow1->dest_mask = flow2->src_mask = flow1->src_mask = flow2->dest_mask = 0; flow1->flow_packets = htonl(st->packets[0]); flow2->flow_packets = htonl(st->packets[1]); flow1->flow_octets = htonl(st->bytes[0]); flow2->flow_octets = htonl(st->bytes[1]); /* * Pretend the flow was created or expired when the machine came up * when creation is in the future of the last time a package was seen * or was created / expired before this machine came up due to pfsync. */ flow1->flow_start = flow2->flow_start = st->creation < 0 || st->creation > st->expire ? htonl(0) : htonl(st->creation); flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? htonl(0) : htonl(st->expire); flow1->tcp_flags = flow2->tcp_flags = 0; flow1->protocol = flow2->protocol = sk->proto; flow1->tos = flow2->tos = st->rule->tos; } static void copy_flow_ipfix_4_data(struct pflow_ipfix_flow4 *flow1, struct pflow_ipfix_flow4 *flow2, const struct pf_kstate *st, struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst) { flow1->src_ip = flow2->dest_ip = sk->addr[src].v4.s_addr; flow1->src_port = flow2->dest_port = sk->port[src]; flow1->dest_ip = flow2->src_ip = sk->addr[dst].v4.s_addr; flow1->dest_port = flow2->src_port = sk->port[dst]; flow1->if_index_in = htonl(st->if_index_in); flow1->if_index_out = htonl(st->if_index_out); flow2->if_index_in = htonl(st->if_index_out); flow2->if_index_out = htonl(st->if_index_in); flow1->flow_packets = htobe64(st->packets[0]); flow2->flow_packets = htobe64(st->packets[1]); flow1->flow_octets = htobe64(st->bytes[0]); flow2->flow_octets = htobe64(st->bytes[1]); /* * Pretend the flow was created when the machine came up when creation * is in the future of the last time a package was seen due to pfsync. */ if (st->creation > st->expire) flow1->flow_start = flow2->flow_start = htobe64((time_second - time_uptime)*1000); else flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() - (pf_get_uptime() - st->creation))); flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() - (pf_get_uptime() - st->expire))); flow1->protocol = flow2->protocol = sk->proto; flow1->tos = flow2->tos = st->rule->tos; } static void copy_flow_ipfix_6_data(struct pflow_ipfix_flow6 *flow1, struct pflow_ipfix_flow6 *flow2, const struct pf_kstate *st, struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst) { bcopy(&sk->addr[src].v6, &flow1->src_ip, sizeof(flow1->src_ip)); bcopy(&sk->addr[src].v6, &flow2->dest_ip, sizeof(flow2->dest_ip)); flow1->src_port = flow2->dest_port = sk->port[src]; bcopy(&sk->addr[dst].v6, &flow1->dest_ip, sizeof(flow1->dest_ip)); bcopy(&sk->addr[dst].v6, &flow2->src_ip, sizeof(flow2->src_ip)); flow1->dest_port = flow2->src_port = sk->port[dst]; flow1->if_index_in = htonl(st->if_index_in); flow1->if_index_out = htonl(st->if_index_out); flow2->if_index_in = htonl(st->if_index_out); flow2->if_index_out = htonl(st->if_index_in); flow1->flow_packets = htobe64(st->packets[0]); flow2->flow_packets = htobe64(st->packets[1]); flow1->flow_octets = htobe64(st->bytes[0]); flow2->flow_octets = htobe64(st->bytes[1]); /* * Pretend the flow was created when the machine came up when creation * is in the future of the last time a package was seen due to pfsync. */ if (st->creation > st->expire) flow1->flow_start = flow2->flow_start = htobe64((time_second - time_uptime)*1000); else flow1->flow_start = flow2->flow_start = htobe64((pf_get_time() - (pf_get_uptime() - st->creation))); flow1->flow_finish = flow2->flow_finish = htobe64((pf_get_time() - (pf_get_uptime() - st->expire))); flow1->protocol = flow2->protocol = sk->proto; flow1->tos = flow2->tos = st->rule->tos; } static void copy_nat_ipfix_4_data(struct pflow_ipfix_nat4 *nat1, struct pflow_ipfix_nat4 *nat2, const struct pf_kstate *st, struct pf_state_key *sk, struct pflow_softc *sc, int src, int dst) { nat1->src_ip = nat2->dest_ip = st->key[PF_SK_STACK]->addr[src].v4.s_addr; nat1->src_port = nat2->dest_port = st->key[PF_SK_STACK]->port[src]; nat1->dest_ip = nat2->src_ip = st->key[PF_SK_STACK]->addr[dst].v4.s_addr; nat1->dest_port = nat2->src_port = st->key[PF_SK_STACK]->port[dst]; nat1->postnat_src_ip = nat2->postnat_dest_ip = st->key[PF_SK_WIRE]->addr[src].v4.s_addr; nat1->postnat_src_port = nat2->postnat_dest_port = st->key[PF_SK_WIRE]->port[src]; nat1->postnat_dest_ip = nat2->postnat_src_ip = st->key[PF_SK_WIRE]->addr[dst].v4.s_addr; nat1->postnat_dest_port = nat2->postnat_src_port = st->key[PF_SK_WIRE]->port[dst]; nat1->protocol = nat2->protocol = sk->proto; /* * Because we have to generate a create and delete event we'll fill out the * timestamp and nat_event fields when we transmit. As opposed to doing this * work a second time. */ } static void export_pflow(const struct pf_kstate *st) { struct pflow_softc *sc = NULL; struct pf_state_key *sk; NET_EPOCH_ASSERT(); /* e.g. if pf_state_key_attach() fails. */ if (st->key[PF_SK_STACK] == NULL || st->key[PF_SK_WIRE] == NULL) return; sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK]; CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) { PFLOW_LOCK(sc); switch (sc->sc_version) { case PFLOW_PROTO_5: if (sk->af == AF_INET) export_pflow_if(st, sk, sc); break; case PFLOW_PROTO_10: if (sk->af == AF_INET || sk->af == AF_INET6) export_pflow_if(st, sk, sc); break; default: /* NOTREACHED */ break; } PFLOW_UNLOCK(sc); } } static int export_pflow_if(const struct pf_kstate *st, struct pf_state_key *sk, struct pflow_softc *sc) { struct pf_kstate pfs_copy; u_int64_t bytes[2]; int ret = 0; if (sc->sc_version == PFLOW_PROTO_10) return (pflow_pack_flow_ipfix(st, sk, sc)); /* PFLOW_PROTO_5 */ if ((st->bytes[0] < (u_int64_t)PFLOW_MAXBYTES) && (st->bytes[1] < (u_int64_t)PFLOW_MAXBYTES)) return (pflow_pack_flow(st, sk, sc)); /* flow > PFLOW_MAXBYTES need special handling */ bcopy(st, &pfs_copy, sizeof(pfs_copy)); bytes[0] = pfs_copy.bytes[0]; bytes[1] = pfs_copy.bytes[1]; while (bytes[0] > PFLOW_MAXBYTES) { pfs_copy.bytes[0] = PFLOW_MAXBYTES; pfs_copy.bytes[1] = 0; if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0) return (ret); if ((bytes[0] - PFLOW_MAXBYTES) > 0) bytes[0] -= PFLOW_MAXBYTES; } while (bytes[1] > (u_int64_t)PFLOW_MAXBYTES) { pfs_copy.bytes[1] = PFLOW_MAXBYTES; pfs_copy.bytes[0] = 0; if ((ret = pflow_pack_flow(&pfs_copy, sk, sc)) != 0) return (ret); if ((bytes[1] - PFLOW_MAXBYTES) > 0) bytes[1] -= PFLOW_MAXBYTES; } pfs_copy.bytes[0] = bytes[0]; pfs_copy.bytes[1] = bytes[1]; return (pflow_pack_flow(&pfs_copy, sk, sc)); } static int copy_flow_to_m(struct pflow_flow *flow, struct pflow_softc *sc) { int ret = 0; PFLOW_ASSERT(sc); if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL) return (ENOBUFS); } m_copyback(sc->sc_mbuf, PFLOW_HDRLEN + (sc->sc_count * sizeof(struct pflow_flow)), sizeof(struct pflow_flow), (caddr_t)flow); pflowstat_inc(pflow_flows); sc->sc_gcounter++; sc->sc_count++; if (sc->sc_count >= sc->sc_maxcount) ret = pflow_sendout_v5(sc); return(ret); } static int copy_flow_ipfix_4_to_m(struct pflow_ipfix_flow4 *flow, struct pflow_softc *sc) { int ret = 0; PFLOW_ASSERT(sc); if (sc->sc_mbuf == NULL) { if ((sc->sc_mbuf = pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) { return (ENOBUFS); } sc->sc_count4 = 0; callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz, pflow_timeout, sc); } m_copyback(sc->sc_mbuf, PFLOW_SET_HDRLEN + (sc->sc_count4 * sizeof(struct pflow_ipfix_flow4)), sizeof(struct pflow_ipfix_flow4), (caddr_t)flow); pflowstat_inc(pflow_flows); sc->sc_gcounter++; sc->sc_count4++; if (sc->sc_count4 >= sc->sc_maxcount4) ret = pflow_sendout_ipfix(sc, PFLOW_INET); return(ret); } static int copy_flow_ipfix_6_to_m(struct pflow_ipfix_flow6 *flow, struct pflow_softc *sc) { int ret = 0; PFLOW_ASSERT(sc); if (sc->sc_mbuf6 == NULL) { if ((sc->sc_mbuf6 = pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) { return (ENOBUFS); } sc->sc_count6 = 0; callout_reset(&sc->sc_tmo6, PFLOW_TIMEOUT * hz, pflow_timeout6, sc); } m_copyback(sc->sc_mbuf6, PFLOW_SET_HDRLEN + (sc->sc_count6 * sizeof(struct pflow_ipfix_flow6)), sizeof(struct pflow_ipfix_flow6), (caddr_t)flow); pflowstat_inc(pflow_flows); sc->sc_gcounter++; sc->sc_count6++; if (sc->sc_count6 >= sc->sc_maxcount6) ret = pflow_sendout_ipfix(sc, PFLOW_INET6); return(ret); } int copy_nat_ipfix_4_to_m(struct pflow_ipfix_nat4 *nat, const struct pf_kstate *st, struct pflow_softc *sc, uint8_t event, uint64_t timestamp) { int ret = 0; PFLOW_ASSERT(sc); if (sc->sc_mbuf_nat4 == NULL) { if ((sc->sc_mbuf_nat4 = pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_NAT44_ID)) == NULL) { return (ENOBUFS); } sc->sc_count_nat4 = 0; callout_reset(&sc->sc_tmo, PFLOW_TIMEOUT * hz, pflow_timeout_nat4, sc); } nat->nat_event = event; nat->timestamp = htobe64(pf_get_time() - (pf_get_uptime() - timestamp)); m_copyback(sc->sc_mbuf_nat4, PFLOW_SET_HDRLEN + (sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4)), sizeof(struct pflow_ipfix_nat4), (caddr_t)nat); sc->sc_count_nat4++; pflowstat_inc(pflow_flows); sc->sc_gcounter++; if (sc->sc_count_nat4 >= sc->sc_maxcount_nat4) ret = pflow_sendout_ipfix(sc, PFLOW_NAT4); return (ret); } static int pflow_pack_flow(const struct pf_kstate *st, struct pf_state_key *sk, struct pflow_softc *sc) { struct pflow_flow flow1; struct pflow_flow flow2; int ret = 0; bzero(&flow1, sizeof(flow1)); bzero(&flow2, sizeof(flow2)); if (st->direction == PF_OUT) copy_flow_data(&flow1, &flow2, st, sk, 1, 0); else copy_flow_data(&flow1, &flow2, st, sk, 0, 1); if (st->bytes[0] != 0) /* first flow from state */ ret = copy_flow_to_m(&flow1, sc); if (st->bytes[1] != 0) /* second flow from state */ ret = copy_flow_to_m(&flow2, sc); return (ret); } static bool pflow_is_natd(const struct pf_kstate *st) { /* If ports or addresses are different we've been NAT-ed. */ return (memcmp(st->key[PF_SK_WIRE], st->key[PF_SK_STACK], sizeof(struct pf_addr) * 2 + sizeof(uint16_t) * 2) != 0); } static int pflow_pack_flow_ipfix(const struct pf_kstate *st, struct pf_state_key *sk, struct pflow_softc *sc) { struct pflow_ipfix_flow4 flow4_1, flow4_2; struct pflow_ipfix_nat4 nat4_1, nat4_2; struct pflow_ipfix_flow6 flow6_1, flow6_2; int ret = 0; bool nat = false; switch (sk->af) { case AF_INET: bzero(&flow4_1, sizeof(flow4_1)); bzero(&flow4_2, sizeof(flow4_2)); nat = pflow_is_natd(st); if (st->direction == PF_OUT) copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc, 1, 0); else copy_flow_ipfix_4_data(&flow4_1, &flow4_2, st, sk, sc, 0, 1); if (nat) copy_nat_ipfix_4_data(&nat4_1, &nat4_2, st, sk, sc, 1, 0); if (st->bytes[0] != 0) /* first flow from state */ { ret = copy_flow_ipfix_4_to_m(&flow4_1, sc); if (ret == 0 && nat) { ret = copy_nat_ipfix_4_to_m(&nat4_1, st, sc, PFIX_NAT_EVENT_SESSION_CREATE, st->creation); ret |= copy_nat_ipfix_4_to_m(&nat4_1, st, sc, PFIX_NAT_EVENT_SESSION_DELETE, st->expire); } } if (st->bytes[1] != 0) /* second flow from state */ { ret = copy_flow_ipfix_4_to_m(&flow4_2, sc); if (ret == 0 && nat) { ret = copy_nat_ipfix_4_to_m(&nat4_2, st, sc, PFIX_NAT_EVENT_SESSION_CREATE, st->creation); ret |= copy_nat_ipfix_4_to_m(&nat4_2, st, sc, PFIX_NAT_EVENT_SESSION_DELETE, st->expire); } } break; case AF_INET6: bzero(&flow6_1, sizeof(flow6_1)); bzero(&flow6_2, sizeof(flow6_2)); if (st->direction == PF_OUT) copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc, 1, 0); else copy_flow_ipfix_6_data(&flow6_1, &flow6_2, st, sk, sc, 0, 1); if (st->bytes[0] != 0) /* first flow from state */ ret = copy_flow_ipfix_6_to_m(&flow6_1, sc); if (st->bytes[1] != 0) /* second flow from state */ ret = copy_flow_ipfix_6_to_m(&flow6_2, sc); break; } return (ret); } static void pflow_timeout(void *v) { struct pflow_softc *sc = v; PFLOW_ASSERT(sc); CURVNET_SET(sc->sc_vnet); switch (sc->sc_version) { case PFLOW_PROTO_5: pflow_sendout_v5(sc); break; case PFLOW_PROTO_10: pflow_sendout_ipfix(sc, PFLOW_INET); break; default: /* NOTREACHED */ panic("Unsupported version %d", sc->sc_version); break; } CURVNET_RESTORE(); } static void pflow_timeout6(void *v) { struct pflow_softc *sc = v; PFLOW_ASSERT(sc); if (sc->sc_version != PFLOW_PROTO_10) return; CURVNET_SET(sc->sc_vnet); pflow_sendout_ipfix(sc, PFLOW_INET6); CURVNET_RESTORE(); } static void pflow_timeout_tmpl(void *v) { struct pflow_softc *sc = v; PFLOW_ASSERT(sc); if (sc->sc_version != PFLOW_PROTO_10) return; CURVNET_SET(sc->sc_vnet); pflow_sendout_ipfix_tmpl(sc); CURVNET_RESTORE(); } static void pflow_timeout_nat4(void *v) { struct pflow_softc *sc = v; PFLOW_ASSERT(sc); if (sc->sc_version != PFLOW_PROTO_10) return; CURVNET_SET(sc->sc_vnet); pflow_sendout_ipfix(sc, PFLOW_NAT4); CURVNET_RESTORE(); } static void pflow_flush(struct pflow_softc *sc) { PFLOW_ASSERT(sc); switch (sc->sc_version) { case PFLOW_PROTO_5: pflow_sendout_v5(sc); break; case PFLOW_PROTO_10: pflow_sendout_ipfix(sc, PFLOW_INET); pflow_sendout_ipfix(sc, PFLOW_INET6); pflow_sendout_ipfix(sc, PFLOW_NAT4); break; default: /* NOTREACHED */ break; } } static int pflow_sendout_v5(struct pflow_softc *sc) { struct mbuf *m = sc->sc_mbuf; struct pflow_header *h; struct timespec tv; PFLOW_ASSERT(sc); if (m == NULL) return (0); sc->sc_mbuf = NULL; pflowstat_inc(pflow_packets); h = mtod(m, struct pflow_header *); h->count = htons(sc->sc_count); /* populate pflow_header */ h->uptime_ms = htonl(time_uptime * 1000); getnanotime(&tv); h->time_sec = htonl(tv.tv_sec); /* XXX 2038 */ h->time_nanosec = htonl(tv.tv_nsec); if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0) swi_sched(sc->sc_swi_cookie, 0); return (0); } static int pflow_sendout_ipfix(struct pflow_softc *sc, enum pflow_family_t af) { struct mbuf *m; struct pflow_v10_header *h10; struct pflow_set_header *set_hdr; u_int32_t count; int set_length; PFLOW_ASSERT(sc); switch (af) { case PFLOW_INET: m = sc->sc_mbuf; callout_stop(&sc->sc_tmo); if (m == NULL) return (0); sc->sc_mbuf = NULL; count = sc->sc_count4; set_length = sizeof(struct pflow_set_header) + sc->sc_count4 * sizeof(struct pflow_ipfix_flow4); break; case PFLOW_INET6: m = sc->sc_mbuf6; callout_stop(&sc->sc_tmo6); if (m == NULL) return (0); sc->sc_mbuf6 = NULL; count = sc->sc_count6; set_length = sizeof(struct pflow_set_header) + sc->sc_count6 * sizeof(struct pflow_ipfix_flow6); break; case PFLOW_NAT4: m = sc->sc_mbuf_nat4; callout_stop(&sc->sc_tmo_nat4); if (m == NULL) return (0); sc->sc_mbuf_nat4 = NULL; count = sc->sc_count_nat4; set_length = sizeof(struct pflow_set_header) + sc->sc_count_nat4 * sizeof(struct pflow_ipfix_nat4); break; default: panic("Unsupported AF %d", af); } pflowstat_inc(pflow_packets); set_hdr = mtod(m, struct pflow_set_header *); set_hdr->set_length = htons(set_length); /* populate pflow_header */ M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT); if (m == NULL) { pflowstat_inc(pflow_onomem); return (ENOBUFS); } h10 = mtod(m, struct pflow_v10_header *); h10->version = htons(PFLOW_PROTO_10); h10->length = htons(PFLOW_IPFIX_HDRLEN + set_length); h10->time_sec = htonl(time_second); /* XXX 2038 */ h10->flow_sequence = htonl(sc->sc_sequence); sc->sc_sequence += count; h10->observation_dom = htonl(sc->sc_observation_dom); if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0) swi_sched(sc->sc_swi_cookie, 0); return (0); } static int pflow_sendout_ipfix_tmpl(struct pflow_softc *sc) { struct mbuf *m; struct pflow_v10_header *h10; PFLOW_ASSERT(sc); m = pflow_get_mbuf(sc, 0); if (m == NULL) return (0); m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl), (caddr_t)&sc->sc_tmpl_ipfix); pflowstat_inc(pflow_packets); /* populate pflow_header */ M_PREPEND(m, sizeof(struct pflow_v10_header), M_NOWAIT); if (m == NULL) { pflowstat_inc(pflow_onomem); return (ENOBUFS); } h10 = mtod(m, struct pflow_v10_header *); h10->version = htons(PFLOW_PROTO_10); h10->length = htons(PFLOW_IPFIX_HDRLEN + sizeof(struct pflow_ipfix_tmpl)); h10->time_sec = htonl(time_second); /* XXX 2038 */ h10->flow_sequence = htonl(sc->sc_sequence); h10->observation_dom = htonl(sc->sc_observation_dom); callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz, pflow_timeout_tmpl, sc); if (mbufq_enqueue(&sc->sc_outputqueue, m) == 0) swi_sched(sc->sc_swi_cookie, 0); return (0); } static int pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m) { if (sc->so == NULL) { m_freem(m); return (EINVAL); } return (sosend(sc->so, sc->sc_flowdst, NULL, m, NULL, 0, curthread)); } static int sysctl_pflowstats(SYSCTL_HANDLER_ARGS) { struct pflowstats pflowstats; pflowstats.pflow_flows = counter_u64_fetch(V_pflowstats.c[pflow_flows]); pflowstats.pflow_packets = counter_u64_fetch(V_pflowstats.c[pflow_packets]); pflowstats.pflow_onomem = counter_u64_fetch(V_pflowstats.c[pflow_onomem]); pflowstats.pflow_oerrors = counter_u64_fetch(V_pflowstats.c[pflow_oerrors]); return (sysctl_handle_opaque(oidp, &pflowstats, sizeof(pflowstats), req)); } static int pflow_nl_list(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct epoch_tracker et; struct pflow_softc *sc = NULL; struct nl_writer *nw = npt->nw; int error = 0; hdr->nlmsg_flags |= NLM_F_MULTI; NET_EPOCH_ENTER(et); CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) { if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { error = ENOMEM; goto out; } struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFLOWNL_CMD_LIST; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PFLOWNL_L_ID, sc->sc_id); if (! nlmsg_end(nw)) { error = ENOMEM; goto out; } } out: NET_EPOCH_EXIT(et); if (error != 0) nlmsg_abort(nw); return (error); } static int pflow_nl_create(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_writer *nw = npt->nw; int error = 0; int unit; if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { return (ENOMEM); } struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = PFLOWNL_CMD_CREATE; ghdr_new->version = 0; ghdr_new->reserved = 0; unit = alloc_unr(V_pflow_unr); if (unit == -1) { nlmsg_abort(nw); return (ENOMEM); } error = pflow_create(unit); if (error != 0) { free_unr(V_pflow_unr, unit); nlmsg_abort(nw); return (error); } nlattr_add_s32(nw, PFLOWNL_CREATE_ID, unit); if (! nlmsg_end(nw)) { pflow_destroy(unit, true); return (ENOMEM); } return (0); } struct pflow_parsed_del { int id; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct pflow_parsed_del, _field) static const struct nlattr_parser nla_p_del[] = { { .type = PFLOWNL_DEL_ID, .off = _OUT(id), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_del[] = {}; #undef _IN #undef _OUT NL_DECLARE_PARSER(del_parser, struct genlmsghdr, nlf_p_del, nla_p_del); static int pflow_nl_del(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct pflow_parsed_del d = {}; int error; error = nl_parse_nlmsg(hdr, &del_parser, npt, &d); if (error != 0) return (error); error = pflow_destroy(d.id, true); return (error); } struct pflow_parsed_get { int id; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct pflow_parsed_get, _field) static const struct nlattr_parser nla_p_get[] = { { .type = PFLOWNL_GET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_get[] = {}; #undef _IN #undef _OUT NL_DECLARE_PARSER(get_parser, struct genlmsghdr, nlf_p_get, nla_p_get); static bool nlattr_add_sockaddr(struct nl_writer *nw, int attr, const struct sockaddr *s) { int off = nlattr_add_nested(nw, attr); if (off == 0) return (false); nlattr_add_u8(nw, PFLOWNL_ADDR_FAMILY, s->sa_family); switch (s->sa_family) { case AF_INET: { const struct sockaddr_in *in = (const struct sockaddr_in *)s; nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in->sin_port); nlattr_add_in_addr(nw, PFLOWNL_ADDR_IP, &in->sin_addr); break; } case AF_INET6: { const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)s; nlattr_add_u16(nw, PFLOWNL_ADDR_PORT, in6->sin6_port); nlattr_add_in6_addr(nw, PFLOWNL_ADDR_IP6, &in6->sin6_addr); break; } default: panic("Unknown address family %d", s->sa_family); } nlattr_set_len(nw, off); return (true); } static int pflow_nl_get(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct epoch_tracker et; struct pflow_parsed_get g = {}; struct pflow_softc *sc = NULL; struct nl_writer *nw = npt->nw; struct genlmsghdr *ghdr_new; int error; error = nl_parse_nlmsg(hdr, &get_parser, npt, &g); if (error != 0) return (error); NET_EPOCH_ENTER(et); CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) { if (sc->sc_id == g.id) break; } if (sc == NULL) { error = ENOENT; goto out; } if (! nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { nlmsg_abort(nw); error = ENOMEM; goto out; } ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); if (ghdr_new == NULL) { nlmsg_abort(nw); error = ENOMEM; goto out; } ghdr_new->cmd = PFLOWNL_CMD_GET; ghdr_new->version = 0; ghdr_new->reserved = 0; nlattr_add_u32(nw, PFLOWNL_GET_ID, sc->sc_id); nlattr_add_u16(nw, PFLOWNL_GET_VERSION, sc->sc_version); if (sc->sc_flowsrc) nlattr_add_sockaddr(nw, PFLOWNL_GET_SRC, sc->sc_flowsrc); if (sc->sc_flowdst) nlattr_add_sockaddr(nw, PFLOWNL_GET_DST, sc->sc_flowdst); nlattr_add_u32(nw, PFLOWNL_GET_OBSERVATION_DOMAIN, sc->sc_observation_dom); nlattr_add_u8(nw, PFLOWNL_GET_SOCKET_STATUS, sc->so != NULL); if (! nlmsg_end(nw)) { nlmsg_abort(nw); error = ENOMEM; } out: NET_EPOCH_EXIT(et); return (error); } struct pflow_sockaddr { union { struct sockaddr_in in; struct sockaddr_in6 in6; struct sockaddr_storage storage; }; }; static bool pflow_postparse_sockaddr(void *parsed_args, struct nl_pstate *npt __unused) { struct pflow_sockaddr *s = (struct pflow_sockaddr *)parsed_args; if (s->storage.ss_family == AF_INET) s->storage.ss_len = sizeof(struct sockaddr_in); else if (s->storage.ss_family == AF_INET6) s->storage.ss_len = sizeof(struct sockaddr_in6); else return (false); return (true); } #define _OUT(_field) offsetof(struct pflow_sockaddr, _field) static struct nlattr_parser nla_p_sockaddr[] = { { .type = PFLOWNL_ADDR_FAMILY, .off = _OUT(in.sin_family), .cb = nlattr_get_uint8 }, { .type = PFLOWNL_ADDR_PORT, .off = _OUT(in.sin_port), .cb = nlattr_get_uint16 }, { .type = PFLOWNL_ADDR_IP, .off = _OUT(in.sin_addr), .cb = nlattr_get_in_addr }, { .type = PFLOWNL_ADDR_IP6, .off = _OUT(in6.sin6_addr), .cb = nlattr_get_in6_addr }, }; NL_DECLARE_ATTR_PARSER_EXT(addr_parser, nla_p_sockaddr, pflow_postparse_sockaddr); #undef _OUT struct pflow_parsed_set { int id; uint16_t version; struct sockaddr_storage src; struct sockaddr_storage dst; uint32_t observation_dom; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct pflow_parsed_set, _field) static const struct nlattr_parser nla_p_set[] = { { .type = PFLOWNL_SET_ID, .off = _OUT(id), .cb = nlattr_get_uint32 }, { .type = PFLOWNL_SET_VERSION, .off = _OUT(version), .cb = nlattr_get_uint16 }, { .type = PFLOWNL_SET_SRC, .off = _OUT(src), .arg = &addr_parser, .cb = nlattr_get_nested }, { .type = PFLOWNL_SET_DST, .off = _OUT(dst), .arg = &addr_parser, .cb = nlattr_get_nested }, { .type = PFLOWNL_SET_OBSERVATION_DOMAIN, .off = _OUT(observation_dom), .cb = nlattr_get_uint32 }, }; static const struct nlfield_parser nlf_p_set[] = {}; #undef _IN #undef _OUT NL_DECLARE_PARSER(set_parser, struct genlmsghdr, nlf_p_set, nla_p_set); static int pflow_set(struct pflow_softc *sc, const struct pflow_parsed_set *pflowr, struct ucred *cred) { struct thread *td; struct socket *so; int error = 0; td = curthread; PFLOW_ASSERT(sc); if (pflowr->version != 0) { switch(pflowr->version) { case PFLOW_PROTO_5: case PFLOW_PROTO_10: break; default: return(EINVAL); } } pflow_flush(sc); if (pflowr->dst.ss_len != 0) { if (sc->sc_flowdst != NULL && sc->sc_flowdst->sa_family != pflowr->dst.ss_family) { free(sc->sc_flowdst, M_DEVBUF); sc->sc_flowdst = NULL; if (sc->so != NULL) { soclose(sc->so); sc->so = NULL; } } switch (pflowr->dst.ss_family) { case AF_INET: if (sc->sc_flowdst == NULL) { if ((sc->sc_flowdst = malloc( sizeof(struct sockaddr_in), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); } memcpy(sc->sc_flowdst, &pflowr->dst, sizeof(struct sockaddr_in)); sc->sc_flowdst->sa_len = sizeof(struct sockaddr_in); break; case AF_INET6: if (sc->sc_flowdst == NULL) { if ((sc->sc_flowdst = malloc( sizeof(struct sockaddr_in6), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); } memcpy(sc->sc_flowdst, &pflowr->dst, sizeof(struct sockaddr_in6)); sc->sc_flowdst->sa_len = sizeof(struct sockaddr_in6); break; default: break; } } if (pflowr->src.ss_len != 0) { if (sc->sc_flowsrc != NULL) free(sc->sc_flowsrc, M_DEVBUF); sc->sc_flowsrc = NULL; if (sc->so != NULL) { soclose(sc->so); sc->so = NULL; } switch(pflowr->src.ss_family) { case AF_INET: if ((sc->sc_flowsrc = malloc( sizeof(struct sockaddr_in), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); memcpy(sc->sc_flowsrc, &pflowr->src, sizeof(struct sockaddr_in)); sc->sc_flowsrc->sa_len = sizeof(struct sockaddr_in); break; case AF_INET6: if ((sc->sc_flowsrc = malloc( sizeof(struct sockaddr_in6), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); memcpy(sc->sc_flowsrc, &pflowr->src, sizeof(struct sockaddr_in6)); sc->sc_flowsrc->sa_len = sizeof(struct sockaddr_in6); break; default: break; } } if (sc->so == NULL) { if (pflowvalidsockaddr(sc->sc_flowdst, 0)) { error = socreate(sc->sc_flowdst->sa_family, &so, SOCK_DGRAM, IPPROTO_UDP, cred, td); if (error) return (error); if (pflowvalidsockaddr(sc->sc_flowsrc, 1)) { error = sobind(so, sc->sc_flowsrc, td); if (error) { soclose(so); return (error); } } sc->so = so; } } else if (!pflowvalidsockaddr(sc->sc_flowdst, 0)) { soclose(sc->so); sc->so = NULL; } if (pflowr->observation_dom != 0) sc->sc_observation_dom = pflowr->observation_dom; /* error check is above */ if (pflowr->version != 0) sc->sc_version = pflowr->version; pflow_setmtu(sc, ETHERMTU); switch (sc->sc_version) { case PFLOW_PROTO_5: callout_stop(&sc->sc_tmo6); callout_stop(&sc->sc_tmo_tmpl); break; case PFLOW_PROTO_10: callout_reset(&sc->sc_tmo_tmpl, PFLOW_TMPL_TIMEOUT * hz, pflow_timeout_tmpl, sc); break; default: /* NOTREACHED */ break; } return (0); } static int pflow_nl_set(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct epoch_tracker et; struct pflow_parsed_set s = {}; struct pflow_softc *sc = NULL; int error; error = nl_parse_nlmsg(hdr, &set_parser, npt, &s); if (error != 0) return (error); NET_EPOCH_ENTER(et); CK_LIST_FOREACH(sc, &V_pflowif_list, sc_next) { if (sc->sc_id == s.id) break; } if (sc == NULL) { error = ENOENT; goto out; } PFLOW_LOCK(sc); error = pflow_set(sc, &s, nlp_get_cred(npt->nlp)); PFLOW_UNLOCK(sc); out: NET_EPOCH_EXIT(et); return (error); } static const struct genl_cmd pflow_cmds[] = { { .cmd_num = PFLOWNL_CMD_LIST, .cmd_name = "LIST", .cmd_cb = pflow_nl_list, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFLOWNL_CMD_CREATE, .cmd_name = "CREATE", .cmd_cb = pflow_nl_create, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFLOWNL_CMD_DEL, .cmd_name = "DEL", .cmd_cb = pflow_nl_del, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFLOWNL_CMD_GET, .cmd_name = "GET", .cmd_cb = pflow_nl_get, .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, { .cmd_num = PFLOWNL_CMD_SET, .cmd_name = "SET", .cmd_cb = pflow_nl_set, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, }; static const struct nlhdr_parser *all_parsers[] = { &del_parser, &get_parser, &set_parser, }; static unsigned pflow_do_osd_jail_slot; static int pflow_init(void) { bool ret; int family_id __diagused; NL_VERIFY_PARSERS(all_parsers); static osd_method_t methods[PR_MAXMETHOD] = { [PR_METHOD_REMOVE] = pflow_jail_remove, }; pflow_do_osd_jail_slot = osd_jail_register(NULL, methods); family_id = genl_register_family(PFLOWNL_FAMILY_NAME, 0, 2, PFLOWNL_CMD_MAX); MPASS(family_id != 0); - ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, NL_ARRAY_LEN(pflow_cmds)); + ret = genl_register_cmds(PFLOWNL_FAMILY_NAME, pflow_cmds, + nitems(pflow_cmds)); return (ret ? 0 : ENODEV); } static void pflow_uninit(void) { osd_jail_deregister(pflow_do_osd_jail_slot); genl_unregister_family(PFLOWNL_FAMILY_NAME); } static int pflow_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: error = pflow_init(); break; case MOD_UNLOAD: pflow_uninit(); break; default: error = EINVAL; break; } return (error); } static moduledata_t pflow_mod = { pflowname, pflow_modevent, 0 }; DECLARE_MODULE(pflow, pflow_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); MODULE_VERSION(pflow, 1); MODULE_DEPEND(pflow, pf, PF_MODVER, PF_MODVER, PF_MODVER); diff --git a/sys/tests/ktest.c b/sys/tests/ktest.c index cd83a6aaaa3f..694e1f4229b5 100644 --- a/sys/tests/ktest.c +++ b/sys/tests/ktest.c @@ -1,413 +1,414 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2023 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct mtx ktest_mtx; #define KTEST_LOCK() mtx_lock(&ktest_mtx) #define KTEST_UNLOCK() mtx_unlock(&ktest_mtx) #define KTEST_LOCK_ASSERT() mtx_assert(&ktest_mtx, MA_OWNED) MTX_SYSINIT(ktest_mtx, &ktest_mtx, "ktest mutex", MTX_DEF); struct ktest_module { struct ktest_module_info *info; volatile u_int refcount; TAILQ_ENTRY(ktest_module) entries; }; static TAILQ_HEAD(, ktest_module) module_list = TAILQ_HEAD_INITIALIZER(module_list); struct nl_ktest_parsed { char *mod_name; char *test_name; struct nlattr *test_meta; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_ktest_parsed, _field) static const struct nlattr_parser nla_p_get[] = { { .type = KTEST_ATTR_MOD_NAME, .off = _OUT(mod_name), .cb = nlattr_get_string }, { .type = KTEST_ATTR_TEST_NAME, .off = _OUT(test_name), .cb = nlattr_get_string }, { .type = KTEST_ATTR_TEST_META, .off = _OUT(test_meta), .cb = nlattr_get_nla }, }; static const struct nlfield_parser nlf_p_get[] = { }; NL_DECLARE_PARSER(ktest_parser, struct genlmsghdr, nlf_p_get, nla_p_get); #undef _IN #undef _OUT static bool create_reply(struct nl_writer *nw, struct nlmsghdr *hdr, int cmd) { if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) return (false); struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = cmd; ghdr_new->version = 0; ghdr_new->reserved = 0; return (true); } static int dump_mod_test(struct nlmsghdr *hdr, struct nl_pstate *npt, struct ktest_module *mod, const struct ktest_test_info *test_info) { struct nl_writer *nw = npt->nw; if (!create_reply(nw, hdr, KTEST_CMD_NEWTEST)) goto enomem; nlattr_add_string(nw, KTEST_ATTR_MOD_NAME, mod->info->name); nlattr_add_string(nw, KTEST_ATTR_TEST_NAME, test_info->name); nlattr_add_string(nw, KTEST_ATTR_TEST_DESCR, test_info->desc); if (nlmsg_end(nw)) return (0); enomem: nlmsg_abort(nw); return (ENOMEM); } static int dump_mod_tests(struct nlmsghdr *hdr, struct nl_pstate *npt, struct ktest_module *mod, struct nl_ktest_parsed *attrs) { for (int i = 0; i < mod->info->num_tests; i++) { const struct ktest_test_info *test_info = &mod->info->tests[i]; if (attrs->test_name != NULL && strcmp(attrs->test_name, test_info->name)) continue; int error = dump_mod_test(hdr, npt, mod, test_info); if (error != 0) return (error); } return (0); } static int dump_tests(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_ktest_parsed attrs = { }; struct ktest_module *mod; int error; error = nl_parse_nlmsg(hdr, &ktest_parser, npt, &attrs); if (error != 0) return (error); hdr->nlmsg_flags |= NLM_F_MULTI; KTEST_LOCK(); TAILQ_FOREACH(mod, &module_list, entries) { if (attrs.mod_name && strcmp(attrs.mod_name, mod->info->name)) continue; error = dump_mod_tests(hdr, npt, mod, &attrs); if (error != 0) break; } KTEST_UNLOCK(); if (!nlmsg_end_dump(npt->nw, error, hdr)) { //NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static int run_test(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nl_ktest_parsed attrs = { }; struct ktest_module *mod; int error; error = nl_parse_nlmsg(hdr, &ktest_parser, npt, &attrs); if (error != 0) return (error); if (attrs.mod_name == NULL) { nlmsg_report_err_msg(npt, "KTEST_ATTR_MOD_NAME not set"); return (EINVAL); } if (attrs.test_name == NULL) { nlmsg_report_err_msg(npt, "KTEST_ATTR_TEST_NAME not set"); return (EINVAL); } const struct ktest_test_info *test = NULL; KTEST_LOCK(); TAILQ_FOREACH(mod, &module_list, entries) { if (strcmp(attrs.mod_name, mod->info->name)) continue; const struct ktest_module_info *info = mod->info; for (int i = 0; i < info->num_tests; i++) { const struct ktest_test_info *test_info = &info->tests[i]; if (!strcmp(attrs.test_name, test_info->name)) { test = test_info; break; } } break; } if (test != NULL) refcount_acquire(&mod->refcount); KTEST_UNLOCK(); if (test == NULL) return (ESRCH); /* Run the test */ struct ktest_test_context ctx = { .npt = npt, .hdr = hdr, .buf = npt_alloc(npt, KTEST_MAX_BUF), .bufsize = KTEST_MAX_BUF, }; if (ctx.buf == NULL) { //NL_LOG(LOG_DEBUG, "unable to allocate temporary buffer"); return (ENOMEM); } if (test->parse != NULL && attrs.test_meta != NULL) { error = test->parse(&ctx, attrs.test_meta); if (error != 0) return (error); } hdr->nlmsg_flags |= NLM_F_MULTI; KTEST_LOG_LEVEL(&ctx, LOG_INFO, "start running %s", test->name); error = test->func(&ctx); KTEST_LOG_LEVEL(&ctx, LOG_INFO, "end running %s", test->name); refcount_release(&mod->refcount); if (!nlmsg_end_dump(npt->nw, error, hdr)) { //NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } /* USER API */ static void register_test_module(struct ktest_module_info *info) { struct ktest_module *mod = malloc(sizeof(*mod), M_TEMP, M_WAITOK | M_ZERO); mod->info = info; info->module_ptr = mod; KTEST_LOCK(); TAILQ_INSERT_TAIL(&module_list, mod, entries); KTEST_UNLOCK(); } static void unregister_test_module(struct ktest_module_info *info) { struct ktest_module *mod = info->module_ptr; info->module_ptr = NULL; KTEST_LOCK(); TAILQ_REMOVE(&module_list, mod, entries); KTEST_UNLOCK(); free(mod, M_TEMP); } static bool can_unregister(struct ktest_module_info *info) { struct ktest_module *mod = info->module_ptr; return (refcount_load(&mod->refcount) == 0); } int ktest_default_modevent(module_t mod, int type, void *arg) { struct ktest_module_info *info = (struct ktest_module_info *)arg; int error = 0; switch (type) { case MOD_LOAD: register_test_module(info); break; case MOD_UNLOAD: if (!can_unregister(info)) return (EBUSY); unregister_test_module(info); break; default: error = EOPNOTSUPP; break; } return (error); } bool ktest_start_msg(struct ktest_test_context *ctx) { return (create_reply(ctx->npt->nw, ctx->hdr, KTEST_CMD_NEWMESSAGE)); } void ktest_add_msg_meta(struct ktest_test_context *ctx, const char *func, const char *fname, int line) { struct nl_writer *nw = ctx->npt->nw; struct timespec ts; nanouptime(&ts); nlattr_add(nw, KTEST_MSG_ATTR_TS, sizeof(ts), &ts); nlattr_add_string(nw, KTEST_MSG_ATTR_FUNC, func); nlattr_add_string(nw, KTEST_MSG_ATTR_FILE, fname); nlattr_add_u32(nw, KTEST_MSG_ATTR_LINE, line); } void ktest_add_msg_text(struct ktest_test_context *ctx, int msg_level, const char *fmt, ...) { va_list ap; va_start(ap, fmt); vsnprintf(ctx->buf, ctx->bufsize, fmt, ap); va_end(ap); nlattr_add_u8(ctx->npt->nw, KTEST_MSG_ATTR_LEVEL, msg_level); nlattr_add_string(ctx->npt->nw, KTEST_MSG_ATTR_TEXT, ctx->buf); } void ktest_end_msg(struct ktest_test_context *ctx) { nlmsg_end(ctx->npt->nw); } /* Module glue */ static const struct nlhdr_parser *all_parsers[] = { &ktest_parser }; static const struct genl_cmd ktest_cmds[] = { { .cmd_num = KTEST_CMD_LIST, .cmd_name = "KTEST_CMD_LIST", .cmd_cb = dump_tests, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, }, { .cmd_num = KTEST_CMD_RUN, .cmd_name = "KTEST_CMD_RUN", .cmd_cb = run_test, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_KLD_LOAD, }, }; static void ktest_nl_register(void) { bool ret __diagused; int family_id __diagused; NL_VERIFY_PARSERS(all_parsers); family_id = genl_register_family(KTEST_FAMILY_NAME, 0, 1, KTEST_CMD_MAX); MPASS(family_id != 0); - ret = genl_register_cmds(KTEST_FAMILY_NAME, ktest_cmds, NL_ARRAY_LEN(ktest_cmds)); + ret = genl_register_cmds(KTEST_FAMILY_NAME, ktest_cmds, + nitems(ktest_cmds)); MPASS(ret); } static void ktest_nl_unregister(void) { MPASS(TAILQ_EMPTY(&module_list)); genl_unregister_family(KTEST_FAMILY_NAME); } static int ktest_modevent(module_t mod, int type, void *unused) { int error = 0; switch (type) { case MOD_LOAD: ktest_nl_register(); break; case MOD_UNLOAD: ktest_nl_unregister(); break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t ktestmod = { "ktest", ktest_modevent, 0 }; DECLARE_MODULE(ktestmod, ktestmod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(ktestmod, 1); MODULE_DEPEND(ktestmod, netlink, 1, 1, 1);