Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4336,7 +4336,9 @@ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_eq.c optional mlx5 pci \ compile-with "${OFED_C}" -dev/mlx5/mlx5_core/mlx5_flow_table.c optional mlx5 pci \ +dev/mlx5/mlx5_core/mlx5_fs_cmd.c optional mlx5 pci \ + compile-with "${OFED_C}" +dev/mlx5/mlx5_core/mlx5_fs_tree.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_fw.c optional mlx5 pci \ compile-with "${OFED_C}" Index: sys/dev/mlx5/device.h =================================================================== --- sys/dev/mlx5/device.h +++ sys/dev/mlx5/device.h @@ -399,7 +399,6 @@ __be32 ieee1588_clk_type; __be32 clr_intx; }; - struct mlx5_eqe_comp { __be32 reserved[6]; __be32 cqn; Index: sys/dev/mlx5/driver.h =================================================================== --- sys/dev/mlx5/driver.h +++ sys/dev/mlx5/driver.h @@ -504,6 +504,8 @@ char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_eswitch; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -556,6 +558,7 @@ struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_eswitch *eswitch; unsigned long pci_dev_data; }; @@ -568,8 +571,10 @@ int resd_lkey; }; +struct mlx5_flow_root_namespace; struct mlx5_core_dev { struct pci_dev *pdev; + u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; @@ -586,6 +591,12 @@ u32 issi; struct mlx5_special_contexts special_contexts; unsigned int module_status[MLX5_MAX_PORTS]; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_root_namespace *esw_egress_root_ns; + struct mlx5_flow_root_namespace *esw_ingress_root_ns; + struct mlx5_flow_root_namespace *sniffer_rx_root_ns; + struct mlx5_flow_root_namespace *sniffer_tx_root_ns; u32 num_q_counter_allocated[MLX5_INTERFACE_NUMBER]; }; Index: sys/dev/mlx5/fs.h =================================================================== --- /dev/null +++ sys/dev/mlx5/fs.h @@ -0,0 +1,223 @@ +/*- + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MLX5_FS_ +#define _MLX5_FS_ + +#include + +#include +#include +#include + +/*Flow tag*/ +enum { + MLX5_FS_DEFAULT_FLOW_TAG = 0xFFFFFF, + MLX5_FS_ETH_FLOW_TAG = 0xFFFFFE, + MLX5_FS_SNIFFER_FLOW_TAG = 0xFFFFFD, +}; + +enum { + MLX5_FS_FLOW_TAG_MASK = 0xFFFFFF, +}; + +#define FS_MAX_TYPES 10 +#define FS_MAX_ENTRIES 32000U + +enum mlx5_flow_namespace_type { + MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_KERNEL, + MLX5_FLOW_NAMESPACE_LEFTOVERS, + MLX5_FLOW_NAMESPACE_SNIFFER_RX, + MLX5_FLOW_NAMESPACE_SNIFFER_TX, + MLX5_FLOW_NAMESPACE_FDB, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, +}; + +struct mlx5_flow_table; +struct mlx5_flow_group; +struct mlx5_flow_rule; +struct mlx5_flow_namespace; + + +struct mlx5_flow_destination { + u32 type; + union { + u32 tir_num; + struct mlx5_flow_table *ft; + u32 vport_num; + }; +}; + +#define FT_NAME_STR_SZ 20 +#define LEFTOVERS_RULE_NUM 2 +static inline void build_leftovers_ft_param(char *name, + unsigned int *priority, + int *n_ent, + int *n_grp) +{ + snprintf(name, FT_NAME_STR_SZ, "leftovers"); + *priority = 0; /*Priority of leftovers_prio-0*/ + *n_ent = LEFTOVERS_RULE_NUM + 1; /*1: star rules*/ + *n_grp = LEFTOVERS_RULE_NUM; +} + +static inline bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +struct mlx5_flow_namespace * +mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type); + +/* The underlying implementation create two more entries for + * chaining flow tables. the user should be aware that if he pass + * max_num_ftes as 2^N it will result in doubled size flow table + */ +struct mlx5_flow_table * +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + const char *name, + int num_flow_table_entries, + int max_num_groups); + +struct mlx5_flow_table * +mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + u16 vport, + int prio, + const char *name, + int num_flow_table_entries); + +struct mlx5_flow_table * +mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, + const char *name, + int num_flow_table_entries); +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); + +/* inbox should be set with the following values: + * start_flow_index + * end_flow_index + * match_criteria_enable + * match_criteria + */ +struct mlx5_flow_group * +mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); + +/* Single destination per rule. + * Group ID is implied by the match criteria. + */ +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest); +void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); + +/*The following API is for sniffer*/ +typedef int (*rule_event_fn)(struct mlx5_flow_rule *rule, + bool ctx_changed, + void *client_data, + void *context); + +struct mlx5_flow_handler; + +struct flow_client_priv_data; + +void mlx5e_sniffer_roce_mode_notify( + struct mlx5_core_dev *mdev, + int action); + +int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule, struct + mlx5_flow_handler *handler, void + *client_data); + +struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, + rule_event_fn add_cb, + rule_event_fn del_cb, + void *context); + +void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler); + +void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns, + rule_event_fn cb, + void *context); + +void mlx5_get_match_criteria(u32 *match_criteria, + struct mlx5_flow_rule *rule); + +void mlx5_get_match_value(u32 *match_value, + struct mlx5_flow_rule *rule); + +u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule); + +struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode); + +void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list); + +struct mlx5_flow_rules_list { + struct list_head head; +}; + +struct mlx5_flow_rule_node { + struct list_head list; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_value[MLX5_ST_SZ_DW(fte_match_param)]; + u8 match_criteria_enable; +}; + +struct mlx5_core_fs_mask { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; +}; + +bool fs_match_exact_val( + struct mlx5_core_fs_mask *mask, + void *val1, + void *val2); + +bool fs_match_exact_mask( + u8 match_criteria_enable1, + u8 match_criteria_enable2, + void *mask1, + void *mask2); +/**********end API for sniffer**********/ + +#endif Index: sys/dev/mlx5/mlx5_core/eswitch.h =================================================================== --- /dev/null +++ sys/dev/mlx5/mlx5_core/eswitch.h @@ -0,0 +1,169 @@ +/*- + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include +#include + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct vport_ingress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_rule *drop_rule; +}; + +struct vport_egress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allowed_vlans_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_rule *allowed_vlan; + struct mlx5_flow_rule *drop_rule; +}; + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + struct vport_ingress ingress; + struct vport_egress egress; + + u16 vlan; + u8 qos; + struct mutex state_lock; /* protect dynamic state changes */ + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; + u16 enabled_events; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch_fdb { + void *fdb; + struct mlx5_flow_group *addr_grp; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; + int enabled_vports; +}; + +struct mlx5_esw_vport_info { + __u32 vf; + __u8 mac[32]; + __u32 vlan; + __u32 qos; + __u32 spoofchk; + __u32 linkstate; + __u32 min_tx_rate; + __u32 max_tx_rate; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct mlx5_esw_vport_info *evi); + +#endif /* __MLX5_ESWITCH_H__ */ Index: sys/dev/mlx5/mlx5_core/fs_core.h =================================================================== --- /dev/null +++ sys/dev/mlx5/mlx5_core/fs_core.h @@ -0,0 +1,300 @@ +/*- + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MLX5_FS_CORE_ +#define _MLX5_FS_CORE_ + +#include +#include +#include +#include + +enum fs_type { + FS_TYPE_NAMESPACE, + FS_TYPE_PRIO, + FS_TYPE_FLOW_TABLE, + FS_TYPE_FLOW_GROUP, + FS_TYPE_FLOW_ENTRY, + FS_TYPE_FLOW_DEST +}; + +enum fs_ft_type { + FS_FT_NIC_RX = 0x0, + FS_FT_ESW_EGRESS_ACL = 0x2, + FS_FT_ESW_INGRESS_ACL = 0x3, + FS_FT_FDB = 0X4, + FS_FT_SNIFFER_RX = 0x5, + FS_FT_SNIFFER_TX = 0x6 +}; + +enum fs_fte_status { + FS_FTE_STATUS_EXISTING = 1UL << 0, +}; + +/* Should always be the first variable in the struct */ +struct fs_base { + struct list_head list; + struct fs_base *parent; + enum fs_type type; + struct kref refcount; + /* lock the node for writing and traversing */ + struct mutex lock; + struct completion complete; + atomic_t users_refcount; + const char *name; +}; + +struct mlx5_flow_rule { + struct fs_base base; + struct mlx5_flow_destination dest_attr; + struct list_head clients_data; + /*protect clients lits*/ + struct mutex clients_lock; +}; + +struct fs_fte { + struct fs_base base; + u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + uint32_t dests_size; + uint32_t flow_tag; + struct list_head dests; + uint32_t index; /* index in ft */ + u8 action; /* MLX5_FLOW_CONTEXT_ACTION */ + enum fs_fte_status status; +}; + +struct fs_star_rule { + struct mlx5_flow_group *fg; + struct fs_fte *fte; +}; + +struct mlx5_flow_table { + struct fs_base base; + /* sorted list by start_index */ + struct list_head fgs; + struct { + bool active; + unsigned int max_types; + unsigned int num_types; + } autogroup; + unsigned int max_fte; + unsigned int level; + uint32_t id; + u16 vport; + enum fs_ft_type type; + struct fs_star_rule star_rule; + unsigned int shared_refcount; +}; + +enum fs_prio_flags { + MLX5_CORE_FS_PRIO_SHARED = 1 +}; + +struct fs_prio { + struct fs_base base; + struct list_head objs; /* each object is a namespace or ft */ + unsigned int max_ft; + unsigned int num_ft; + unsigned int max_ns; + unsigned int prio; + /*When create shared flow table, this lock should be taken*/ + struct mutex shared_lock; + u8 flags; +}; + +struct mlx5_flow_namespace { + /* parent == NULL => root ns */ + struct fs_base base; + /* sorted by priority number */ + struct list_head prios; /* list of fs_prios */ + struct list_head list_notifiers; + struct rw_semaphore notifiers_rw_sem; + struct rw_semaphore dests_rw_sem; +}; + +struct mlx5_flow_root_namespace { + struct mlx5_flow_namespace ns; + struct mlx5_flow_table *ft_level_0; + enum fs_ft_type table_type; + struct mlx5_core_dev *dev; + struct mlx5_flow_table *root_ft; + /* When chaining flow-tables, this lock should be taken */ + struct mutex fs_chain_lock; +}; + +struct mlx5_flow_group { + struct fs_base base; + struct list_head ftes; + struct mlx5_core_fs_mask mask; + uint32_t start_index; + uint32_t max_ftes; + uint32_t num_ftes; + uint32_t id; +}; + +struct mlx5_flow_handler { + struct list_head list; + rule_event_fn add_dst_cb; + rule_event_fn del_dst_cb; + void *client_context; + struct mlx5_flow_namespace *ns; +}; + +struct fs_client_priv_data { + struct mlx5_flow_handler *fs_handler; + struct list_head list; + void *client_dst_data; +}; + +void _fs_remove_node(struct kref *kref); +#define fs_get_obj(v, _base) {v = container_of((_base), typeof(*v), base); } +#define fs_get_parent(v, child) {v = (child)->base.parent ? \ + container_of((child)->base.parent, \ + typeof(*v), base) : NULL; } + +#define fs_list_for_each_entry(pos, cond, root) \ + list_for_each_entry(pos, root, base.list) \ + if (!(cond)) {} else + +#define fs_list_for_each_entry_continue(pos, cond, root) \ + list_for_each_entry_continue(pos, root, base.list) \ + if (!(cond)) {} else + +#define fs_list_for_each_entry_reverse(pos, cond, root) \ + list_for_each_entry_reverse(pos, root, base.list) \ + if (!(cond)) {} else + +#define fs_list_for_each_entry_continue_reverse(pos, cond, root) \ + list_for_each_entry_continue_reverse(pos, root, base.list) \ + if (!(cond)) {} else + +#define fs_for_each_ft(pos, prio) \ + fs_list_for_each_entry(pos, (pos)->base.type == FS_TYPE_FLOW_TABLE, \ + &(prio)->objs) + +#define fs_for_each_ft_reverse(pos, prio) \ + fs_list_for_each_entry_reverse(pos, \ + (pos)->base.type == FS_TYPE_FLOW_TABLE, \ + &(prio)->objs) + +#define fs_for_each_ns(pos, prio) \ + fs_list_for_each_entry(pos, \ + (pos)->base.type == FS_TYPE_NAMESPACE, \ + &(prio)->objs) + +#define fs_for_each_ns_or_ft_reverse(pos, prio) \ + list_for_each_entry_reverse(pos, &(prio)->objs, list) \ + if (!((pos)->type == FS_TYPE_NAMESPACE || \ + (pos)->type == FS_TYPE_FLOW_TABLE)) {} else + +#define fs_for_each_ns_or_ft(pos, prio) \ + list_for_each_entry(pos, &(prio)->objs, list) \ + if (!((pos)->type == FS_TYPE_NAMESPACE || \ + (pos)->type == FS_TYPE_FLOW_TABLE)) {} else + +#define fs_for_each_ns_or_ft_continue_reverse(pos, prio) \ + list_for_each_entry_continue_reverse(pos, &(prio)->objs, list) \ + if (!((pos)->type == FS_TYPE_NAMESPACE || \ + (pos)->type == FS_TYPE_FLOW_TABLE)) {} else + +#define fs_for_each_ns_or_ft_continue(pos, prio) \ + list_for_each_entry_continue(pos, &(prio)->objs, list) \ + if (!((pos)->type == FS_TYPE_NAMESPACE || \ + (pos)->type == FS_TYPE_FLOW_TABLE)) {} else + +#define fs_for_each_prio(pos, ns) \ + fs_list_for_each_entry(pos, (pos)->base.type == FS_TYPE_PRIO, \ + &(ns)->prios) + +#define fs_for_each_prio_reverse(pos, ns) \ + fs_list_for_each_entry_reverse(pos, (pos)->base.type == FS_TYPE_PRIO, \ + &(ns)->prios) + +#define fs_for_each_prio_continue(pos, ns) \ + fs_list_for_each_entry_continue(pos, (pos)->base.type == FS_TYPE_PRIO, \ + &(ns)->prios) + +#define fs_for_each_prio_continue_reverse(pos, ns) \ + fs_list_for_each_entry_continue_reverse(pos, \ + (pos)->base.type == FS_TYPE_PRIO, \ + &(ns)->prios) + +#define fs_for_each_fg(pos, ft) \ + fs_list_for_each_entry(pos, (pos)->base.type == FS_TYPE_FLOW_GROUP, \ + &(ft)->fgs) + +#define fs_for_each_fte(pos, fg) \ + fs_list_for_each_entry(pos, (pos)->base.type == FS_TYPE_FLOW_ENTRY, \ + &(fg)->ftes) +#define fs_for_each_dst(pos, fte) \ + fs_list_for_each_entry(pos, (pos)->base.type == FS_TYPE_FLOW_DEST, \ + &(fte)->dests) + +int mlx5_cmd_fs_create_ft(struct mlx5_core_dev *dev, + u16 vport, + enum fs_ft_type type, unsigned int level, + unsigned int log_size, unsigned int *table_id); + +int mlx5_cmd_fs_destroy_ft(struct mlx5_core_dev *dev, + u16 vport, + enum fs_ft_type type, unsigned int table_id); + +int mlx5_cmd_fs_create_fg(struct mlx5_core_dev *dev, + u32 *in, + u16 vport, + enum fs_ft_type type, unsigned int table_id, + unsigned int *group_id); + +int mlx5_cmd_fs_destroy_fg(struct mlx5_core_dev *dev, + u16 vport, + enum fs_ft_type type, unsigned int table_id, + unsigned int group_id); + + +int mlx5_cmd_fs_set_fte(struct mlx5_core_dev *dev, + u16 vport, + enum fs_fte_status *fte_status, + u32 *match_val, + enum fs_ft_type type, unsigned int table_id, + unsigned int index, unsigned int group_id, + unsigned int flow_tag, + unsigned short action, int dest_size, + struct list_head *dests); /* mlx5_flow_desination */ + +int mlx5_cmd_fs_delete_fte(struct mlx5_core_dev *dev, + u16 vport, + enum fs_fte_status *fte_status, + enum fs_ft_type type, unsigned int table_id, + unsigned int index); + +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + enum fs_ft_type type, + unsigned int id); + +int mlx5_init_fs(struct mlx5_core_dev *dev); +void mlx5_cleanup_fs(struct mlx5_core_dev *dev); +#endif Index: sys/dev/mlx5/mlx5_core/mlx5_core.h =================================================================== --- sys/dev/mlx5/mlx5_core/mlx5_core.h +++ sys/dev/mlx5/mlx5_core/mlx5_core.h @@ -33,8 +33,8 @@ #include #define DRIVER_NAME "mlx5_core" -#define DRIVER_VERSION "1.23.0 (03 Mar 2015)" -#define DRIVER_RELDATE "03 Mar 2015" +#define DRIVER_VERSION "3.2-rc1" +#define DRIVER_RELDATE "May 2016" extern int mlx5_core_debug_mask; Index: sys/dev/mlx5/mlx5_core/mlx5_eswitch.c =================================================================== --- /dev/null +++ sys/dev/mlx5/mlx5_core/mlx5_eswitch.c @@ -0,0 +1,1411 @@ +/*- + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" + +#define UPLINK_VPORT 0xFFFF + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + printf("mlx5_core: INFO: ""(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + printf("mlx5_core: WARN: ""(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { + struct l2addr_node node; + u32 table_index; + u32 vport; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* E-Switch vport context HW commands */ +static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 *vlan, u8 *qos) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + int err; + bool cvlan_strip; + bool cvlan_insert; + + memset(out, 0, sizeof(out)); + + *vlan = 0; + *qos = 0; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); + if (err) + goto out; + + cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_strip); + + cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_insert); + + if (cvlan_strip || cvlan_insert) { + *vlan = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_id); + *qos = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_pcp); + } + + esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", + vport, *vlan, *qos); +out: + return err; +} + +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; + + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(dev, in, inlen, + out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, bool set) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", + vport, vlan, qos, set); + + if (set) { + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + printf("mlx5_core: WARN: ""FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw->fdb_table.fdb, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + printf("mlx5_core: WARN: ""FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -ENOMEM; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + memset(flow_group_in, 0, inlen); + + /* (-2) Since MaorG said so .. */ + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)) - 2; + + fdb = mlx5_create_flow_table(root_ns, 0, "FDB", table_size); + if (IS_ERR_OR_NULL(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + eth_broadcast_addr(dmac); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + + esw->fdb_table.addr_grp = g; + esw->fdb_table.fdb = fdb; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(fdb)) + mlx5_destroy_flow_table(fdb); + return err; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; + esw->fdb_table.addr_grp = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, esw_uc->vport); + return -EEXIST; + } + + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) + return -ENOMEM; + esw_uc->vport = vport; + + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); + if (err) + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); + return err; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NIC_VPORT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NIC_VPORT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NIC_VPORT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NIC_VPORT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NIC_VPORT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NIC_VPORT_LIST_TYPE_MC); + } + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp = NULL; + struct mlx5_flow_group *drop_grp = NULL; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + void *match_criteria; + char table_name[32]; + u32 *flow_group_in; + int table_size = 2; + int err = 0; + + if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + return; + + esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); + return; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return; + + snprintf(table_name, 32, "egress_%d", vport->vport); + acl = mlx5_create_vport_flow_table(root_ns, vport->vport, 0, table_name, table_size); + if (IS_ERR_OR_NULL(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(vlan_grp)) { + err = PTR_ERR(vlan_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + vport->egress.acl = acl; + vport->egress.drop_grp = drop_grp; + vport->egress.allowed_vlans_grp = vlan_grp; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(vlan_grp)) + mlx5_destroy_flow_group(vlan_grp); + if (err && !IS_ERR_OR_NULL(acl)) + mlx5_destroy_flow_table(acl); +} + +static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) + mlx5_del_flow_rule(vport->egress.allowed_vlan); + + if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) + mlx5_del_flow_rule(vport->egress.drop_rule); + + vport->egress.allowed_vlan = NULL; + vport->egress.drop_rule = NULL; +} + +static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_vport_cleanup_egress_rules(esw, vport); + mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp); + mlx5_destroy_flow_group(vport->egress.drop_grp); + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.allowed_vlans_grp = NULL; + vport->egress.drop_grp = NULL; + vport->egress.acl = NULL; +} + +static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + void *match_criteria; + char table_name[32]; + u32 *flow_group_in; + int table_size = 1; + int err = 0; + + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + return; + + esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); + return; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return; + + snprintf(table_name, 32, "ingress_%d", vport->vport); + acl = mlx5_create_vport_flow_table(root_ns, vport->vport, 0, table_name, table_size); + if (IS_ERR_OR_NULL(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + vport->ingress.acl = acl; + vport->ingress.drop_grp = g; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(acl)) + mlx5_destroy_flow_table(acl); +} + +static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) + mlx5_del_flow_rule(vport->ingress.drop_rule); + vport->ingress.drop_rule = NULL; +} + +static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_vport_cleanup_ingress_rules(esw, vport); + mlx5_destroy_flow_group(vport->ingress.drop_grp); + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; + vport->ingress.drop_grp = NULL; +} + +static int esw_vport_ingress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_destination dest; + u32 *match_v; + u32 *match_c; + int err = 0; + + if (IS_ERR_OR_NULL(vport->ingress.acl)) { + esw_warn(esw->dev, + "vport[%d] configure ingress rules failed, ingress acl is not initialized!\n", + vport->vport); + return -EPERM; + } + + esw_vport_cleanup_ingress_rules(esw, vport); + + if (!vport->vlan && !vport->qos) + return 0; + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->vlan, vport->qos); + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + err = -ENOMEM; + esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n", + vport->vport, err); + goto out; + } + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.cvlan_tag); + + dest.type = MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT; + dest.vport_num = vport->vport; + + vport->ingress.drop_rule = + mlx5_add_flow_rule(vport->ingress.acl, + MLX5_MATCH_OUTER_HEADERS, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, &dest); + if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) { + err = PTR_ERR(vport->ingress.drop_rule); + printf("mlx5_core: WARN: ""vport[%d] configure ingress rules, err(%d)\n", vport->vport, err); + vport->ingress.drop_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return err; +} + +static int esw_vport_egress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_destination dest; + u32 *match_v; + u32 *match_c; + int err = 0; + + if (IS_ERR_OR_NULL(vport->egress.acl)) { + esw_warn(esw->dev, "vport[%d] configure rgress rules failed, egress acl is not initialized!\n", + vport->vport); + return -EPERM; + } + + esw_vport_cleanup_egress_rules(esw, vport); + + if (!vport->vlan && !vport->qos) + return 0; + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->vlan, vport->qos); + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + err = -ENOMEM; + esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n", + vport->vport, err); + goto out; + } + + /* Allowed vlan rule */ + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid); + MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan); + + dest.type = MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT; + dest.vport_num = vport->vport; + + vport->egress.allowed_vlan = + mlx5_add_flow_rule(vport->egress.acl, + MLX5_MATCH_OUTER_HEADERS, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, &dest); + if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + printf("mlx5_core: WARN: ""vport[%d] configure egress allowed vlan rule failed, err(%d)\n", vport->vport, err); + vport->egress.allowed_vlan = NULL; + goto out; + } + + /* Drop others rule (star rule) */ + memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + vport->egress.drop_rule = + mlx5_add_flow_rule(vport->egress.acl, + 0, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, &dest); + if (IS_ERR_OR_NULL(vport->egress.drop_rule)) { + err = PTR_ERR(vport->egress.drop_rule); + printf("mlx5_core: WARN: ""vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err); + vport->egress.drop_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return err; +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + mutex_lock(&vport->state_lock); + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + + if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */ + esw_vport_enable_ingress_acl(esw, vport); + esw_vport_enable_egress_acl(esw, vport); + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); + mutex_unlock(&vport->state_lock); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NIC_VPORT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NIC_VPORT_LIST_TYPE_MC); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + mutex_lock(&vport->state_lock); + if (!vport->enabled) { + mutex_unlock(&vport->state_lock); + return; + } + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + vport->enabled_events = 0; + spin_unlock_irqrestore(&vport->lock, flags); + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + if (vport_num) { + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + } + esw->enabled_vports--; + mutex_unlock(&vport->state_lock); +} + +/* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + + if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1; + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + mutex_init(&vport->state_lock); + } + + esw->total_vports = total_vports; + esw->enabled_vports = 0; + + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + printf("mlx5_core: WARN: ""MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} + +/* Vport Administration */ +#define ESW_ALLOWED(esw) \ + (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + int err = 0; + u64 node_guid; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + return err; + } + + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_node_guid vport(%d) err=(%d)\n", + vport, err); + return err; + } + + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + return mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct mlx5_esw_vport_info *ivi) +{ + u16 vlan; + u8 qos; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); + ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport); + query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); + ivi->vlan = vlan; + ivi->qos = qos; + ivi->spoofchk = 0; + + return 0; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + struct mlx5_vport *evport; + int err = 0; + int set = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (vlan || qos) + set = 1; + + evport = &esw->vports[vport]; + + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + if (err) + return err; + + mutex_lock(&evport->state_lock); + evport->vlan = vlan; + evport->qos = qos; + if (evport->enabled) { + esw_vport_ingress_config(esw, evport); + esw_vport_egress_config(esw, evport); + } + mutex_unlock(&evport->state_lock); + return err; +} + Index: sys/dev/mlx5/mlx5_core/mlx5_fs_cmd.c =================================================================== --- /dev/null +++ sys/dev/mlx5/mlx5_core/mlx5_fs_cmd.c @@ -0,0 +1,301 @@ +/*- + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include + +#include "fs_core.h" +#include "mlx5_core.h" + +int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, + enum fs_ft_type type, + unsigned int id) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)]; + + if (!dev) + return -EINVAL; + memset(in, 0, sizeof(in)); + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, type); + MLX5_SET(set_flow_table_root_in, in, table_id, id); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_fs_create_ft(struct mlx5_core_dev *dev, + u16 vport, + enum fs_ft_type type, unsigned int level, + unsigned int log_size, unsigned int *table_id) +{ + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; + int err; + + if (!dev) + return -EINVAL; + memset(in, 0, sizeof(in)); + + MLX5_SET(create_flow_table_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_TABLE); + + MLX5_SET(create_flow_table_in, in, table_type, type); + MLX5_SET(create_flow_table_in, in, level, level); + MLX5_SET(create_flow_table_in, in, log_size, log_size); + if (vport) { + MLX5_SET(create_flow_table_in, in, vport_number, vport); + MLX5_SET(create_flow_table_in, in, other_vport, 1); + } + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); + if (err) + return err; + + *table_id = MLX5_GET(create_flow_table_out, out, table_id); + + return 0; +} + +int mlx5_cmd_fs_destroy_ft(struct mlx5_core_dev *dev, + u16 vport, + enum fs_ft_type type, unsigned int table_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; + + if (!dev) + return -EINVAL; + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_table_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_TABLE); + MLX5_SET(destroy_flow_table_in, in, table_type, type); + MLX5_SET(destroy_flow_table_in, in, table_id, table_id); + if (vport) { + MLX5_SET(destroy_flow_table_in, in, vport_number, vport); + MLX5_SET(destroy_flow_table_in, in, other_vport, 1); + } + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_fs_create_fg(struct mlx5_core_dev *dev, + u32 *in, + u16 vport, + enum fs_ft_type type, unsigned int table_id, + unsigned int *group_id) +{ + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; + int err; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + if (!dev) + return -EINVAL; + memset(out, 0, sizeof(out)); + + MLX5_SET(create_flow_group_in, in, opcode, + MLX5_CMD_OP_CREATE_FLOW_GROUP); + MLX5_SET(create_flow_group_in, in, table_type, type); + MLX5_SET(create_flow_group_in, in, table_id, table_id); + if (vport) { + MLX5_SET(create_flow_group_in, in, vport_number, vport); + MLX5_SET(create_flow_group_in, in, other_vport, 1); + } + + err = mlx5_cmd_exec_check_status(dev, in, + inlen, out, + sizeof(out)); + if (!err) + *group_id = MLX5_GET(create_flow_group_out, out, group_id); + + return err; +} + +int mlx5_cmd_fs_destroy_fg(struct mlx5_core_dev *dev, + u16 vport, + enum fs_ft_type type, unsigned int table_id, + unsigned int group_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; + + if (!dev) + return -EINVAL; + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(destroy_flow_group_in, in, opcode, + MLX5_CMD_OP_DESTROY_FLOW_GROUP); + MLX5_SET(destroy_flow_group_in, in, table_type, type); + MLX5_SET(destroy_flow_group_in, in, table_id, table_id); + MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + if (vport) { + MLX5_SET(destroy_flow_group_in, in, vport_number, vport); + MLX5_SET(destroy_flow_group_in, in, other_vport, 1); + } + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_fs_set_fte(struct mlx5_core_dev *dev, + u16 vport, + enum fs_fte_status *fte_status, + u32 *match_val, + enum fs_ft_type type, unsigned int table_id, + unsigned int index, unsigned int group_id, + unsigned int flow_tag, + unsigned short action, int dest_size, + struct list_head *dests) /* mlx5_flow_desination */ +{ + u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + u32 *in; + unsigned int inlen; + struct mlx5_flow_rule *dst; + void *in_flow_context; + void *in_match_value; + void *in_dests; + int err; + int opmod = 0; + int modify_mask = 0; + int atomic_mod_cap; + + if (action != MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) + dest_size = 0; + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + + dest_size * MLX5_ST_SZ_BYTES(dest_format_struct); + + if (!dev) + return -EINVAL; + + if (*fte_status & FS_FTE_STATUS_EXISTING) { + atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + flow_modify_en); + if (!atomic_mod_cap) + return -ENOTSUPP; + opmod = 1; + modify_mask = 1 << + MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST; + } + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY); + MLX5_SET(set_fte_in, in, op_mod, opmod); + MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask); + MLX5_SET(set_fte_in, in, table_type, type); + MLX5_SET(set_fte_in, in, table_id, table_id); + MLX5_SET(set_fte_in, in, flow_index, index); + if (vport) { + MLX5_SET(set_fte_in, in, vport_number, vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } + + in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); + MLX5_SET(flow_context, in_flow_context, group_id, group_id); + MLX5_SET(flow_context, in_flow_context, flow_tag, flow_tag); + MLX5_SET(flow_context, in_flow_context, action, action); + MLX5_SET(flow_context, in_flow_context, destination_list_size, + dest_size); + in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, + match_value); + memcpy(in_match_value, match_val, MLX5_ST_SZ_BYTES(fte_match_param)); + if (dest_size) { + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + list_for_each_entry(dst, dests, base.list) { + unsigned int id; + + MLX5_SET(dest_format_struct, in_dests, destination_type, + dst->dest_attr.type); + if (dst->dest_attr.type == + MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE) + id = dst->dest_attr.ft->id; + else + id = dst->dest_attr.tir_num; + MLX5_SET(dest_format_struct, in_dests, destination_id, id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + } + } + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec_check_status(dev, in, inlen, out, + sizeof(out)); + if (!err) + *fte_status |= FS_FTE_STATUS_EXISTING; + + kvfree(in); + + return err; +} + +int mlx5_cmd_fs_delete_fte(struct mlx5_core_dev *dev, + u16 vport, + enum fs_fte_status *fte_status, + enum fs_ft_type type, unsigned int table_id, + unsigned int index) +{ + u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; + u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; + int err; + + if (!(*fte_status & FS_FTE_STATUS_EXISTING)) + return 0; + + if (!dev) + return -EINVAL; + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); + MLX5_SET(delete_fte_in, in, table_type, type); + MLX5_SET(delete_fte_in, in, table_id, table_id); + MLX5_SET(delete_fte_in, in, flow_index, index); + if (vport) { + MLX5_SET(delete_fte_in, in, vport_number, vport); + MLX5_SET(delete_fte_in, in, other_vport, 1); + } + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *fte_status = 0; + + return err; +} Index: sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c =================================================================== --- /dev/null +++ sys/dev/mlx5/mlx5_core/mlx5_fs_tree.c @@ -0,0 +1,2721 @@ +/*- + * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include "mlx5_core.h" +#include "fs_core.h" +#include +#include + +#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ + sizeof(struct init_tree_node)) + +#define ADD_PRIO(name_val, flags_val, min_level_val, max_ft_val, caps_val, \ + ...) {.type = FS_TYPE_PRIO,\ + .name = name_val,\ + .min_ft_level = min_level_val,\ + .flags = flags_val,\ + .max_ft = max_ft_val,\ + .caps = caps_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define ADD_FT_PRIO(name_val, flags_val, max_ft_val, ...)\ + ADD_PRIO(name_val, flags_val, 0, max_ft_val, {},\ + __VA_ARGS__)\ + +#define ADD_NS(name_val, ...) {.type = FS_TYPE_NAMESPACE,\ + .name = name_val,\ + .children = (struct init_tree_node[]) {__VA_ARGS__},\ + .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ +} + +#define INIT_CAPS_ARRAY_SIZE(...) (sizeof((long[]){__VA_ARGS__}) /\ + sizeof(long)) + +#define FS_CAP(cap) (__mlx5_bit_off(flow_table_nic_cap, cap)) + +#define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ + .caps = (long[]) {__VA_ARGS__}} + +#define BYPASS_MAX_FT 5 +#define BYPASS_PRIO_MAX_FT 1 +#define KERNEL_MAX_FT 3 +#define LEFTOVER_MAX_FT 1 +#define KENREL_MIN_LEVEL 3 +#define LEFTOVER_MIN_LEVEL KENREL_MIN_LEVEL + 1 +#define BYPASS_MIN_LEVEL MLX5_NUM_BYPASS_FTS + LEFTOVER_MIN_LEVEL +struct node_caps { + size_t arr_sz; + long *caps; +}; + +struct init_tree_node { + enum fs_type type; + const char *name; + struct init_tree_node *children; + int ar_size; + struct node_caps caps; + u8 flags; + int min_ft_level; + int prio; + int max_ft; +} root_fs = { + .type = FS_TYPE_NAMESPACE, + .name = "root", + .ar_size = 3, + .children = (struct init_tree_node[]) { + ADD_PRIO("by_pass_prio", 0, BYPASS_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root)), + ADD_NS("by_pass_ns", + ADD_FT_PRIO("prio0", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio1", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio2", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio3", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio4", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio5", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio6", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio7", 0, + BYPASS_PRIO_MAX_FT), + ADD_FT_PRIO("prio-mcast", 0, + BYPASS_PRIO_MAX_FT))), + ADD_PRIO("kernel_prio", 0, KENREL_MIN_LEVEL, 0, {}, + ADD_NS("kernel_ns", + ADD_FT_PRIO("prio_kernel-0", 0, + KERNEL_MAX_FT))), + ADD_PRIO("leftovers_prio", MLX5_CORE_FS_PRIO_SHARED, + LEFTOVER_MIN_LEVEL, 0, + FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), + FS_CAP(flow_table_properties_nic_receive.modify_root)), + ADD_NS("leftover_ns", + ADD_FT_PRIO("leftovers_prio-0", + MLX5_CORE_FS_PRIO_SHARED, + LEFTOVER_MAX_FT))) + } +}; + +/* Tree creation functions */ + +static struct mlx5_flow_root_namespace *find_root(struct fs_base *node) +{ + struct fs_base *parent; + + /* Make sure we only read it once while we go up the tree */ + while ((parent = node->parent)) + node = parent; + + if (node->type != FS_TYPE_NAMESPACE) { + printf("mlx5_core: WARN: ""mlx5: flow steering node %s is not in tree or garbaged\n", node->name); + return NULL; + } + + return container_of(container_of(node, + struct mlx5_flow_namespace, + base), + struct mlx5_flow_root_namespace, + ns); +} + +static inline struct mlx5_core_dev *fs_get_dev(struct fs_base *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev; + return NULL; +} + +static void fs_init_node(struct fs_base *node, + unsigned int refcount) +{ + kref_init(&node->refcount); + atomic_set(&node->users_refcount, refcount); + init_completion(&node->complete); + INIT_LIST_HEAD(&node->list); + mutex_init(&node->lock); +} + +static void _fs_add_node(struct fs_base *node, + const char *name, + struct fs_base *parent) +{ + if (parent) + atomic_inc(&parent->users_refcount); + node->name = kstrdup_const(name, GFP_KERNEL); + node->parent = parent; +} + +static void fs_add_node(struct fs_base *node, + struct fs_base *parent, const char *name, + unsigned int refcount) +{ + fs_init_node(node, refcount); + _fs_add_node(node, name, parent); +} + +static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref), + bool parent_locked); + +static void fs_del_dst(struct mlx5_flow_rule *dst); +static void _fs_del_ft(struct mlx5_flow_table *ft); +static void fs_del_fg(struct mlx5_flow_group *fg); +static void fs_del_fte(struct fs_fte *fte); + +static void cmd_remove_node(struct fs_base *base) +{ + switch (base->type) { + case FS_TYPE_FLOW_DEST: + fs_del_dst(container_of(base, struct mlx5_flow_rule, base)); + break; + case FS_TYPE_FLOW_TABLE: + _fs_del_ft(container_of(base, struct mlx5_flow_table, base)); + break; + case FS_TYPE_FLOW_GROUP: + fs_del_fg(container_of(base, struct mlx5_flow_group, base)); + break; + case FS_TYPE_FLOW_ENTRY: + fs_del_fte(container_of(base, struct fs_fte, base)); + break; + default: + break; + } +} + +static void __fs_remove_node(struct kref *kref) +{ + struct fs_base *node = container_of(kref, struct fs_base, refcount); + + if (node->parent) + mutex_lock(&node->parent->lock); + mutex_lock(&node->lock); + cmd_remove_node(node); + mutex_unlock(&node->lock); + complete(&node->complete); + if (node->parent) { + mutex_unlock(&node->parent->lock); + _fs_put(node->parent, _fs_remove_node, false); + } +} + +void _fs_remove_node(struct kref *kref) +{ + struct fs_base *node = container_of(kref, struct fs_base, refcount); + + __fs_remove_node(kref); + kfree_const(node->name); + kfree(node); +} + +static void fs_get(struct fs_base *node) +{ + atomic_inc(&node->users_refcount); +} + +static void _fs_put(struct fs_base *node, void (*kref_cb)(struct kref *kref), + bool parent_locked) +{ + struct fs_base *parent_node = node->parent; + + if (parent_node && !parent_locked) + mutex_lock(&parent_node->lock); + if (atomic_dec_and_test(&node->users_refcount)) { + if (parent_node) { + /*remove from parent's list*/ + list_del_init(&node->list); + mutex_unlock(&parent_node->lock); + } + kref_put(&node->refcount, kref_cb); + if (parent_node && parent_locked) + mutex_lock(&parent_node->lock); + } else if (parent_node && !parent_locked) { + mutex_unlock(&parent_node->lock); + } +} + +static void fs_put(struct fs_base *node) +{ + _fs_put(node, __fs_remove_node, false); +} + +static void fs_put_parent_locked(struct fs_base *node) +{ + _fs_put(node, __fs_remove_node, true); +} + +static void fs_remove_node(struct fs_base *node) +{ + fs_put(node); + wait_for_completion(&node->complete); + kfree_const(node->name); + kfree(node); +} + +static void fs_remove_node_parent_locked(struct fs_base *node) +{ + fs_put_parent_locked(node); + wait_for_completion(&node->complete); + kfree_const(node->name); + kfree(node); +} + +static struct fs_fte *fs_alloc_fte(u8 action, + u32 flow_tag, + u32 *match_value, + unsigned int index) +{ + struct fs_fte *fte; + + + fte = kzalloc(sizeof(*fte), GFP_KERNEL); + if (!fte) + return ERR_PTR(-ENOMEM); + + memcpy(fte->val, match_value, sizeof(fte->val)); + fte->base.type = FS_TYPE_FLOW_ENTRY; + fte->dests_size = 0; + fte->flow_tag = flow_tag; + fte->index = index; + INIT_LIST_HEAD(&fte->dests); + fte->action = action; + + return fte; +} + +static struct fs_fte *alloc_star_ft_entry(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg, + u32 *match_value, + unsigned int index) +{ + int err; + struct fs_fte *fte; + struct mlx5_flow_rule *dst; + + if (fg->num_ftes == fg->max_ftes) + return ERR_PTR(-ENOSPC); + + fte = fs_alloc_fte(MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, match_value, index); + if (IS_ERR(fte)) + return fte; + + /*create dst*/ + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free_fte; + } + + fte->base.parent = &fg->base; + fte->dests_size = 1; + dst->dest_attr.type = MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE; + dst->base.parent = &fte->base; + list_add(&dst->base.list, &fte->dests); + /* assumed that the callee creates the star rules sorted by index */ + list_add_tail(&fte->base.list, &fg->ftes); + fg->num_ftes++; + + return fte; + +free_fte: + kfree(fte); + return ERR_PTR(err); +} + +/* assume that fte can't be changed */ +static void free_star_fte_entry(struct fs_fte *fte) +{ + struct mlx5_flow_group *fg; + struct mlx5_flow_rule *dst, *temp; + + fs_get_parent(fg, fte); + + list_for_each_entry_safe(dst, temp, &fte->dests, base.list) { + fte->dests_size--; + list_del(&dst->base.list); + kfree(dst); + } + + list_del(&fte->base.list); + fg->num_ftes--; + kfree(fte); +} + +static struct mlx5_flow_group *fs_alloc_fg(u32 *create_fg_in) +{ + struct mlx5_flow_group *fg; + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + create_fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + create_fg_in, + match_criteria_enable); + fg = kzalloc(sizeof(*fg), GFP_KERNEL); + if (!fg) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&fg->ftes); + fg->mask.match_criteria_enable = match_criteria_enable; + memcpy(&fg->mask.match_criteria, match_criteria, + sizeof(fg->mask.match_criteria)); + fg->base.type = FS_TYPE_FLOW_GROUP; + fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, + start_flow_index); + fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, + end_flow_index) - fg->start_index + 1; + return fg; +} + +static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio); +static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr, + struct fs_prio *prio); + +/* assumed src_ft and dst_ft can't be freed */ +static int fs_set_star_rule(struct mlx5_core_dev *dev, + struct mlx5_flow_table *src_ft, + struct mlx5_flow_table *dst_ft) +{ + struct mlx5_flow_rule *src_dst; + struct fs_fte *src_fte; + int err = 0; + u32 *match_value; + int match_len = MLX5_ST_SZ_BYTES(fte_match_param); + + src_dst = list_first_entry(&src_ft->star_rule.fte->dests, + struct mlx5_flow_rule, base.list); + match_value = mlx5_vzalloc(match_len); + if (!match_value) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + /*Create match context*/ + + fs_get_parent(src_fte, src_dst); + + src_dst->dest_attr.ft = dst_ft; + if (dst_ft) { + err = mlx5_cmd_fs_set_fte(dev, + src_ft->vport, + &src_fte->status, + match_value, src_ft->type, + src_ft->id, src_fte->index, + src_ft->star_rule.fg->id, + src_fte->flow_tag, + src_fte->action, + src_fte->dests_size, + &src_fte->dests); + if (err) + goto free; + + fs_get(&dst_ft->base); + } else { + mlx5_cmd_fs_delete_fte(dev, + src_ft->vport, + &src_fte->status, + src_ft->type, src_ft->id, + src_fte->index); + } + +free: + kvfree(match_value); + return err; +} + +static int connect_prev_fts(struct fs_prio *locked_prio, + struct fs_prio *prev_prio, + struct mlx5_flow_table *next_ft) +{ + struct mlx5_flow_table *iter; + int err = 0; + struct mlx5_core_dev *dev = fs_get_dev(&prev_prio->base); + + if (!dev) + return -ENODEV; + + mutex_lock(&prev_prio->base.lock); + fs_for_each_ft(iter, prev_prio) { + struct mlx5_flow_rule *src_dst = + list_first_entry(&iter->star_rule.fte->dests, + struct mlx5_flow_rule, base.list); + struct mlx5_flow_table *prev_ft = src_dst->dest_attr.ft; + + if (prev_ft == next_ft) + continue; + + err = fs_set_star_rule(dev, iter, next_ft); + if (err) { + mlx5_core_warn(dev, + "mlx5: flow steering can't connect prev and next\n"); + goto unlock; + } else { + /* Assume ft's prio is locked */ + if (prev_ft) { + struct fs_prio *prio; + + fs_get_parent(prio, prev_ft); + if (prio == locked_prio) + fs_put_parent_locked(&prev_ft->base); + else + fs_put(&prev_ft->base); + } + } + } + +unlock: + mutex_unlock(&prev_prio->base.lock); + return 0; +} + +static int create_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio) +{ + struct mlx5_flow_group *fg; + int err; + u32 *fg_in; + u32 *match_value; + struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *prev_ft; + struct mlx5_flow_root_namespace *root = find_root(&prio->base); + int fg_inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int match_len = MLX5_ST_SZ_BYTES(fte_match_param); + + fg_in = mlx5_vzalloc(fg_inlen); + if (!fg_in) { + mlx5_core_warn(root->dev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + match_value = mlx5_vzalloc(match_len); + if (!match_value) { + mlx5_core_warn(root->dev, "failed to allocate inbox\n"); + kvfree(fg_in); + return -ENOMEM; + } + + MLX5_SET(create_flow_group_in, fg_in, start_flow_index, ft->max_fte); + MLX5_SET(create_flow_group_in, fg_in, end_flow_index, ft->max_fte); + fg = fs_alloc_fg(fg_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto out; + } + ft->star_rule.fg = fg; + err = mlx5_cmd_fs_create_fg(fs_get_dev(&prio->base), + fg_in, ft->vport, ft->type, + ft->id, + &fg->id); + if (err) + goto free_fg; + + ft->star_rule.fte = alloc_star_ft_entry(ft, fg, + match_value, + ft->max_fte); + if (IS_ERR(ft->star_rule.fte)) + goto free_star_rule; + + mutex_lock(&root->fs_chain_lock); + next_ft = find_next_ft(prio); + err = fs_set_star_rule(root->dev, ft, next_ft); + if (err) { + mutex_unlock(&root->fs_chain_lock); + goto free_star_rule; + } + if (next_ft) { + struct fs_prio *parent; + + fs_get_parent(parent, next_ft); + fs_put(&next_ft->base); + } + prev_ft = find_prev_ft(ft, prio); + if (prev_ft) { + struct fs_prio *prev_parent; + + fs_get_parent(prev_parent, prev_ft); + + err = connect_prev_fts(NULL, prev_parent, ft); + if (err) { + mutex_unlock(&root->fs_chain_lock); + goto destroy_chained_star_rule; + } + fs_put(&prev_ft->base); + } + mutex_unlock(&root->fs_chain_lock); + kvfree(fg_in); + kvfree(match_value); + + return 0; + +destroy_chained_star_rule: + fs_set_star_rule(fs_get_dev(&prio->base), ft, NULL); + if (next_ft) + fs_put(&next_ft->base); +free_star_rule: + free_star_fte_entry(ft->star_rule.fte); + mlx5_cmd_fs_destroy_fg(fs_get_dev(&ft->base), ft->vport, + ft->type, ft->id, + fg->id); +free_fg: + kfree(fg); +out: + kvfree(fg_in); + kvfree(match_value); + return err; +} + +static void destroy_star_rule(struct mlx5_flow_table *ft, struct fs_prio *prio) +{ + int err; + struct mlx5_flow_root_namespace *root; + struct mlx5_core_dev *dev = fs_get_dev(&prio->base); + struct mlx5_flow_table *prev_ft, *next_ft; + struct fs_prio *prev_prio; + + WARN_ON(!dev); + + root = find_root(&prio->base); + if (!root) + printf("mlx5_core: ERR: ""mlx5: flow steering failed to find root of priority %s", prio->base.name); + + /* In order to ensure atomic deletion, first update + * prev ft to point on the next ft. + */ + mutex_lock(&root->fs_chain_lock); + prev_ft = find_prev_ft(ft, prio); + next_ft = find_next_ft(prio); + if (prev_ft) { + fs_get_parent(prev_prio, prev_ft); + /*Prev is connected to ft, only if ft is the first(last) in the prio*/ + err = connect_prev_fts(prio, prev_prio, next_ft); + if (err) + mlx5_core_warn(root->dev, + "flow steering can't connect prev and next of flow table\n"); + fs_put(&prev_ft->base); + } + + err = fs_set_star_rule(root->dev, ft, NULL); + /*One put is for fs_get in find next ft*/ + if (next_ft) { + fs_put(&next_ft->base); + if (!err) + fs_put(&next_ft->base); + } + + mutex_unlock(&root->fs_chain_lock); + err = mlx5_cmd_fs_destroy_fg(dev, ft->vport, ft->type, ft->id, + ft->star_rule.fg->id); + if (err) + mlx5_core_warn(dev, + "flow steering can't destroy star entry group(index:%d) of ft:%s\n", ft->star_rule.fg->start_index, + ft->base.name); + free_star_fte_entry(ft->star_rule.fte); + + kfree(ft->star_rule.fg); + ft->star_rule.fg = NULL; +} + +static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, + unsigned int prio) +{ + struct fs_prio *iter_prio; + + fs_for_each_prio(iter_prio, ns) { + if (iter_prio->prio == prio) + return iter_prio; + } + + return NULL; +} + +static unsigned int _alloc_new_level(struct fs_prio *prio, + struct mlx5_flow_namespace *match); + +static unsigned int __alloc_new_level(struct mlx5_flow_namespace *ns, + struct fs_prio *prio) +{ + unsigned int level = 0; + struct fs_prio *p; + + if (!ns) + return 0; + + mutex_lock(&ns->base.lock); + fs_for_each_prio(p, ns) { + if (p != prio) + level += p->max_ft; + else + break; + } + mutex_unlock(&ns->base.lock); + + fs_get_parent(prio, ns); + if (prio) + WARN_ON(prio->base.type != FS_TYPE_PRIO); + + return level + _alloc_new_level(prio, ns); +} + +/* Called under lock of priority, hence locking all upper objects */ +static unsigned int _alloc_new_level(struct fs_prio *prio, + struct mlx5_flow_namespace *match) +{ + struct mlx5_flow_namespace *ns; + struct fs_base *it; + unsigned int level = 0; + + if (!prio) + return 0; + + mutex_lock(&prio->base.lock); + fs_for_each_ns_or_ft_reverse(it, prio) { + if (it->type == FS_TYPE_NAMESPACE) { + struct fs_prio *p; + + fs_get_obj(ns, it); + + if (match != ns) { + mutex_lock(&ns->base.lock); + fs_for_each_prio(p, ns) + level += p->max_ft; + mutex_unlock(&ns->base.lock); + } else { + break; + } + } else { + struct mlx5_flow_table *ft; + + fs_get_obj(ft, it); + mutex_unlock(&prio->base.lock); + return level + ft->level + 1; + } + } + + fs_get_parent(ns, prio); + mutex_unlock(&prio->base.lock); + return __alloc_new_level(ns, prio) + level; +} + +static unsigned int alloc_new_level(struct fs_prio *prio) +{ + return _alloc_new_level(prio, NULL); +} + +static int update_root_ft_create(struct mlx5_flow_root_namespace *root, + struct mlx5_flow_table *ft) +{ + int err = 0; + int min_level = INT_MAX; + + if (root->root_ft) + min_level = root->root_ft->level; + + if (ft->level < min_level) + err = mlx5_cmd_update_root_ft(root->dev, ft->type, + ft->id); + else + return err; + + if (err) + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + else + root->root_ft = ft; + + return err; +} + +static struct mlx5_flow_table *_create_ft_common(struct mlx5_flow_namespace *ns, + u16 vport, + struct fs_prio *fs_prio, + int max_fte, + const char *name) +{ + struct mlx5_flow_table *ft; + int err; + int log_table_sz; + int ft_size; + char gen_name[20]; + struct mlx5_flow_root_namespace *root = + find_root(&ns->base); + + if (!root) { + printf("mlx5_core: ERR: ""mlx5: flow steering failed to find root of namespace %s", ns->base.name); + return ERR_PTR(-ENODEV); + } + + if (fs_prio->num_ft == fs_prio->max_ft) + return ERR_PTR(-ENOSPC); + + ft = kzalloc(sizeof(*ft), GFP_KERNEL); + if (!ft) + return ERR_PTR(-ENOMEM); + + fs_init_node(&ft->base, 1); + INIT_LIST_HEAD(&ft->fgs); + + /* Temporarily WA until we expose the level set in the API */ + if (root->table_type == FS_FT_ESW_EGRESS_ACL || + root->table_type == FS_FT_ESW_INGRESS_ACL) + ft->level = 0; + else + ft->level = alloc_new_level(fs_prio); + + ft->base.type = FS_TYPE_FLOW_TABLE; + ft->vport = vport; + ft->type = root->table_type; + /*Two entries are reserved for star rules*/ + ft_size = roundup_pow_of_two(max_fte + 2); + /*User isn't aware to those rules*/ + ft->max_fte = ft_size - 2; + log_table_sz = ilog2(ft_size); + err = mlx5_cmd_fs_create_ft(root->dev, ft->vport, ft->type, + ft->level, log_table_sz, &ft->id); + if (err) + goto free_ft; + + err = create_star_rule(ft, fs_prio); + if (err) + goto del_ft; + + if ((root->table_type == FS_FT_NIC_RX) && MLX5_CAP_FLOWTABLE(root->dev, + flow_table_properties_nic_receive.modify_root)) { + err = update_root_ft_create(root, ft); + if (err) + goto destroy_star_rule; + } + + if (!name || !strlen(name)) { + snprintf(gen_name, 20, "flow_table_%u", ft->id); + _fs_add_node(&ft->base, gen_name, &fs_prio->base); + } else { + _fs_add_node(&ft->base, name, &fs_prio->base); + } + list_add_tail(&ft->base.list, &fs_prio->objs); + fs_prio->num_ft++; + + return ft; + +destroy_star_rule: + destroy_star_rule(ft, fs_prio); +del_ft: + mlx5_cmd_fs_destroy_ft(root->dev, ft->vport, ft->type, ft->id); +free_ft: + kfree(ft); + return ERR_PTR(err); +} + +static struct mlx5_flow_table *create_ft_common(struct mlx5_flow_namespace *ns, + u16 vport, + unsigned int prio, + int max_fte, + const char *name) +{ + struct fs_prio *fs_prio = NULL; + fs_prio = find_prio(ns, prio); + if (!fs_prio) + return ERR_PTR(-EINVAL); + + return _create_ft_common(ns, vport, fs_prio, max_fte, name); +} + + +static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns, + struct list_head *start); + +static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio, + struct list_head *start); + +static struct mlx5_flow_table *mlx5_create_autogrouped_shared_flow_table(struct fs_prio *fs_prio) +{ + struct mlx5_flow_table *ft; + + ft = find_first_ft_in_prio(fs_prio, &fs_prio->objs); + if (ft) { + ft->shared_refcount++; + return ft; + } + + return NULL; +} + +struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + const char *name, + int num_flow_table_entries, + int max_num_groups) +{ + struct mlx5_flow_table *ft = NULL; + struct fs_prio *fs_prio; + bool is_shared_prio; + + fs_prio = find_prio(ns, prio); + if (!fs_prio) + return ERR_PTR(-EINVAL); + + is_shared_prio = fs_prio->flags & MLX5_CORE_FS_PRIO_SHARED; + if (is_shared_prio) { + mutex_lock(&fs_prio->shared_lock); + ft = mlx5_create_autogrouped_shared_flow_table(fs_prio); + } + + if (ft) + goto return_ft; + + ft = create_ft_common(ns, 0, prio, num_flow_table_entries, + name); + if (IS_ERR(ft)) + goto return_ft; + + ft->autogroup.active = true; + ft->autogroup.max_types = max_num_groups; + if (is_shared_prio) + ft->shared_refcount = 1; + +return_ft: + if (is_shared_prio) + mutex_unlock(&fs_prio->shared_lock); + return ft; +} +EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); + +struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + u16 vport, + int prio, + const char *name, + int num_flow_table_entries) +{ + return create_ft_common(ns, vport, prio, num_flow_table_entries, name); +} +EXPORT_SYMBOL(mlx5_create_vport_flow_table); + +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, + const char *name, + int num_flow_table_entries) +{ + return create_ft_common(ns, 0, prio, num_flow_table_entries, name); +} +EXPORT_SYMBOL(mlx5_create_flow_table); + +static void _fs_del_ft(struct mlx5_flow_table *ft) +{ + int err; + struct mlx5_core_dev *dev = fs_get_dev(&ft->base); + struct fs_prio *prio; + + err = mlx5_cmd_fs_destroy_ft(dev, ft->vport, ft->type, ft->id); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft %s\n", + ft->base.name); + + fs_get_parent(prio, ft); + prio->num_ft--; +} + +static int update_root_ft_destroy(struct mlx5_flow_root_namespace *root, + struct mlx5_flow_table *ft) +{ + int err = 0; + struct fs_prio *prio; + struct mlx5_flow_table *next_ft = NULL; + struct mlx5_flow_table *put_ft = NULL; + + if (root->root_ft != ft) + return 0; + + fs_get_parent(prio, ft); + /*Assuming objs containis only flow tables and + * flow tables are sorted by level. + */ + if (!list_is_last(&ft->base.list, &prio->objs)) { + next_ft = list_next_entry(ft, base.list); + } else { + next_ft = find_next_ft(prio); + put_ft = next_ft; + } + + if (next_ft) { + err = mlx5_cmd_update_root_ft(root->dev, next_ft->type, + next_ft->id); + if (err) + mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", + ft->id); + } + if (!err) + root->root_ft = next_ft; + + if (put_ft) + fs_put(&put_ft->base); + + return err; +} + +/*Objects in the same prio are destroyed in the reverse order they were createrd*/ +int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) +{ + int err = 0; + struct fs_prio *prio; + struct mlx5_flow_root_namespace *root; + bool is_shared_prio; + + fs_get_parent(prio, ft); + root = find_root(&prio->base); + + if (!root) { + printf("mlx5_core: ERR: ""mlx5: flow steering failed to find root of priority %s", prio->base.name); + return -ENODEV; + } + + is_shared_prio = prio->flags & MLX5_CORE_FS_PRIO_SHARED; + if (is_shared_prio) { + mutex_lock(&prio->shared_lock); + if (ft->shared_refcount > 1) { + --ft->shared_refcount; + fs_put(&ft->base); + mutex_unlock(&prio->shared_lock); + return 0; + } + } + + mutex_lock(&prio->base.lock); + mutex_lock(&ft->base.lock); + + err = update_root_ft_destroy(root, ft); + if (err) + goto unlock_ft; + + /* delete two last entries */ + destroy_star_rule(ft, prio); + + mutex_unlock(&ft->base.lock); + fs_remove_node_parent_locked(&ft->base); + mutex_unlock(&prio->base.lock); + if (is_shared_prio) + mutex_unlock(&prio->shared_lock); + + return err; + +unlock_ft: + mutex_unlock(&ft->base.lock); + mutex_unlock(&prio->base.lock); + if (is_shared_prio) + mutex_unlock(&prio->shared_lock); + + return err; +} +EXPORT_SYMBOL(mlx5_destroy_flow_table); + +static struct mlx5_flow_group *fs_create_fg(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct list_head *prev, + u32 *fg_in, + int refcount) +{ + struct mlx5_flow_group *fg; + int err; + unsigned int end_index; + char name[20]; + + fg = fs_alloc_fg(fg_in); + if (IS_ERR(fg)) + return fg; + + end_index = fg->start_index + fg->max_ftes - 1; + err = mlx5_cmd_fs_create_fg(dev, fg_in, + ft->vport, ft->type, ft->id, + &fg->id); + if (err) + goto free_fg; + + mutex_lock(&ft->base.lock); + if (ft->autogroup.active) + ft->autogroup.num_types++; + + snprintf(name, sizeof(name), "group_%u", fg->id); + /*Add node to tree*/ + fs_add_node(&fg->base, &ft->base, name, refcount); + /*Add node to group list*/ + list_add(&fg->base.list, prev); + mutex_unlock(&ft->base.lock); + + return fg; + +free_fg: + kfree(fg); + return ERR_PTR(err); +} + +struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, + u32 *in) +{ + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev = fs_get_dev(&ft->base); + + if (!dev) + return ERR_PTR(-ENODEV); + + if (ft->autogroup.active) + return ERR_PTR(-EPERM); + + fg = fs_create_fg(dev, ft, ft->fgs.prev, in, 1); + + return fg; +} +EXPORT_SYMBOL(mlx5_create_flow_group); + +/*Group is destoyed when all the rules in the group were removed*/ +static void fs_del_fg(struct mlx5_flow_group *fg) +{ + struct mlx5_flow_table *parent_ft; + struct mlx5_core_dev *dev; + + fs_get_parent(parent_ft, fg); + dev = fs_get_dev(&parent_ft->base); + WARN_ON(!dev); + + if (parent_ft->autogroup.active) + parent_ft->autogroup.num_types--; + + if (mlx5_cmd_fs_destroy_fg(dev, parent_ft->vport, + parent_ft->type, + parent_ft->id, fg->id)) + mlx5_core_warn(dev, "flow steering can't destroy fg\n"); +} + +void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) +{ + fs_remove_node(&fg->base); +} +EXPORT_SYMBOL(mlx5_destroy_flow_group); + +static bool _fs_match_exact_val(void *mask, void *val1, void *val2, size_t size) +{ + unsigned int i; + + /* TODO: optimize by comparing 64bits when possible */ + for (i = 0; i < size; i++, mask++, val1++, val2++) + if ((*((u8 *)val1) & (*(u8 *)mask)) != + ((*(u8 *)val2) & (*(u8 *)mask))) + return false; + + return true; +} + +bool fs_match_exact_val(struct mlx5_core_fs_mask *mask, + void *val1, void *val2) +{ + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + val1, outer_headers); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + val2, outer_headers); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, outer_headers); + + if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + return false; + } + + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + val1, misc_parameters); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + val2, misc_parameters); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, misc_parameters); + + if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_misc))) + return false; + } + if (mask->match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + void *fte_match1 = MLX5_ADDR_OF(fte_match_param, + val1, inner_headers); + void *fte_match2 = MLX5_ADDR_OF(fte_match_param, + val2, inner_headers); + void *fte_mask = MLX5_ADDR_OF(fte_match_param, + mask->match_criteria, inner_headers); + + if (!_fs_match_exact_val(fte_mask, fte_match1, fte_match2, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + return false; + } + return true; +} + +bool fs_match_exact_mask(u8 match_criteria_enable1, + u8 match_criteria_enable2, + void *mask1, void *mask2) +{ + return match_criteria_enable1 == match_criteria_enable2 && + !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); +} + +static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns, + struct list_head *start); + +static struct mlx5_flow_table *_find_first_ft_in_prio_reverse(struct fs_prio *prio, + struct list_head *start) +{ + struct fs_base *it = container_of(start, struct fs_base, list); + + if (!prio) + return NULL; + + fs_for_each_ns_or_ft_continue_reverse(it, prio) { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + + if (it->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, it); + fs_get(&ft->base); + return ft; + } + + fs_get_obj(ns, it); + WARN_ON(ns->base.type != FS_TYPE_NAMESPACE); + + ft = find_first_ft_in_ns_reverse(ns, &ns->prios); + if (ft) + return ft; + } + + return NULL; +} + +static struct mlx5_flow_table *find_first_ft_in_prio_reverse(struct fs_prio *prio, + struct list_head *start) +{ + struct mlx5_flow_table *ft; + + if (!prio) + return NULL; + + mutex_lock(&prio->base.lock); + ft = _find_first_ft_in_prio_reverse(prio, start); + mutex_unlock(&prio->base.lock); + + return ft; +} + +static struct mlx5_flow_table *find_first_ft_in_ns_reverse(struct mlx5_flow_namespace *ns, + struct list_head *start) +{ + struct fs_prio *prio; + + if (!ns) + return NULL; + + fs_get_obj(prio, container_of(start, struct fs_base, list)); + mutex_lock(&ns->base.lock); + fs_for_each_prio_continue_reverse(prio, ns) { + struct mlx5_flow_table *ft; + + ft = find_first_ft_in_prio_reverse(prio, &prio->objs); + if (ft) { + mutex_unlock(&ns->base.lock); + return ft; + } + } + mutex_unlock(&ns->base.lock); + + return NULL; +} + +/* Returned a held ft, assumed curr is protected, assumed curr's parent is + * locked + */ +static struct mlx5_flow_table *find_prev_ft(struct mlx5_flow_table *curr, + struct fs_prio *prio) +{ + struct mlx5_flow_table *ft = NULL; + struct fs_base *curr_base; + + if (!curr) + return NULL; + + /* prio has either namespace or flow-tables, but not both */ + if (!list_empty(&prio->objs) && + list_first_entry(&prio->objs, struct mlx5_flow_table, base.list) != + curr) + return NULL; + + while (!ft && prio) { + struct mlx5_flow_namespace *ns; + + fs_get_parent(ns, prio); + ft = find_first_ft_in_ns_reverse(ns, &prio->base.list); + curr_base = &ns->base; + fs_get_parent(prio, ns); + + if (prio && !ft) + ft = find_first_ft_in_prio_reverse(prio, + &curr_base->list); + } + return ft; +} + +static struct mlx5_flow_table *_find_first_ft_in_prio(struct fs_prio *prio, + struct list_head *start) +{ + struct fs_base *it = container_of(start, struct fs_base, list); + + if (!prio) + return NULL; + + fs_for_each_ns_or_ft_continue(it, prio) { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + + if (it->type == FS_TYPE_FLOW_TABLE) { + fs_get_obj(ft, it); + fs_get(&ft->base); + return ft; + } + + fs_get_obj(ns, it); + WARN_ON(ns->base.type != FS_TYPE_NAMESPACE); + + ft = find_first_ft_in_ns(ns, &ns->prios); + if (ft) + return ft; + } + + return NULL; +} + +static struct mlx5_flow_table *find_first_ft_in_prio(struct fs_prio *prio, + struct list_head *start) +{ + struct mlx5_flow_table *ft; + + if (!prio) + return NULL; + + mutex_lock(&prio->base.lock); + ft = _find_first_ft_in_prio(prio, start); + mutex_unlock(&prio->base.lock); + + return ft; +} + +static struct mlx5_flow_table *find_first_ft_in_ns(struct mlx5_flow_namespace *ns, + struct list_head *start) +{ + struct fs_prio *prio; + + if (!ns) + return NULL; + + fs_get_obj(prio, container_of(start, struct fs_base, list)); + mutex_lock(&ns->base.lock); + fs_for_each_prio_continue(prio, ns) { + struct mlx5_flow_table *ft; + + ft = find_first_ft_in_prio(prio, &prio->objs); + if (ft) { + mutex_unlock(&ns->base.lock); + return ft; + } + } + mutex_unlock(&ns->base.lock); + + return NULL; +} + +/* returned a held ft, assumed curr is protected, assumed curr's parent is + * locked + */ +static struct mlx5_flow_table *find_next_ft(struct fs_prio *prio) +{ + struct mlx5_flow_table *ft = NULL; + struct fs_base *curr_base; + + while (!ft && prio) { + struct mlx5_flow_namespace *ns; + + fs_get_parent(ns, prio); + ft = find_first_ft_in_ns(ns, &prio->base.list); + curr_base = &ns->base; + fs_get_parent(prio, ns); + + if (!ft && prio) + ft = _find_first_ft_in_prio(prio, &curr_base->list); + } + return ft; +} + + +/* called under ft mutex lock */ +static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria) +{ + unsigned int group_size; + unsigned int candidate_index = 0; + unsigned int candidate_group_num = 0; + struct mlx5_flow_group *g; + struct mlx5_flow_group *ret; + struct list_head *prev = &ft->fgs; + struct mlx5_core_dev *dev; + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + + if (!ft->autogroup.active) + return ERR_PTR(-ENOENT); + + dev = fs_get_dev(&ft->base); + if (!dev) + return ERR_PTR(-ENODEV); + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return ERR_PTR(-ENOMEM); + } + + + if (ft->autogroup.num_types < ft->autogroup.max_types) + group_size = ft->max_fte / (ft->autogroup.max_types + 1); + else + group_size = 1; + + if (group_size == 0) { + mlx5_core_warn(dev, + "flow steering can't create group size of 0\n"); + ret = ERR_PTR(-EINVAL); + goto out; + } + + /* sorted by start_index */ + fs_for_each_fg(g, ft) { + candidate_group_num++; + if (candidate_index + group_size > g->start_index) + candidate_index = g->start_index + g->max_ftes; + else + break; + prev = &g->base.list; + } + + if (candidate_index + group_size > ft->max_fte) { + ret = ERR_PTR(-ENOSPC); + goto out; + } + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + + group_size - 1); + match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, + in, match_criteria); + memcpy(match_criteria_addr, match_criteria, + MLX5_ST_SZ_BYTES(fte_match_param)); + + ret = fs_create_fg(dev, ft, prev, in, 0); +out: + kvfree(in); + return ret; +} + +static struct mlx5_flow_namespace *get_ns_with_notifiers(struct fs_base *node) +{ + struct mlx5_flow_namespace *ns = NULL; + + while (node && (node->type != FS_TYPE_NAMESPACE || + list_empty(&container_of(node, struct + mlx5_flow_namespace, + base)->list_notifiers))) + node = node->parent; + + if (node) + fs_get_obj(ns, node); + + return ns; +} + + +/*Assumption- fte is locked*/ +static void call_to_add_rule_notifiers(struct mlx5_flow_rule *dst, + struct fs_fte *fte) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handler *iter_handler; + struct fs_client_priv_data *iter_client; + void *data; + bool is_new_rule = list_first_entry(&fte->dests, + struct mlx5_flow_rule, + base.list) == dst; + int err; + + ns = get_ns_with_notifiers(&fte->base); + if (!ns) + return; + + down_read(&ns->notifiers_rw_sem); + list_for_each_entry(iter_handler, &ns->list_notifiers, + list) { + if (iter_handler->add_dst_cb) { + data = NULL; + mutex_lock(&dst->clients_lock); + list_for_each_entry( + iter_client, &dst->clients_data, list) { + if (iter_client->fs_handler == iter_handler) { + data = iter_client->client_dst_data; + break; + } + } + mutex_unlock(&dst->clients_lock); + err = iter_handler->add_dst_cb(dst, + is_new_rule, + NULL, + iter_handler->client_context); + if (err) + break; + } + } + up_read(&ns->notifiers_rw_sem); +} + +static void call_to_del_rule_notifiers(struct mlx5_flow_rule *dst, + struct fs_fte *fte) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handler *iter_handler; + struct fs_client_priv_data *iter_client; + void *data; + bool ctx_changed = (fte->dests_size == 0); + + ns = get_ns_with_notifiers(&fte->base); + if (!ns) + return; + down_read(&ns->notifiers_rw_sem); + list_for_each_entry(iter_handler, &ns->list_notifiers, + list) { + data = NULL; + mutex_lock(&dst->clients_lock); + list_for_each_entry(iter_client, &dst->clients_data, list) { + if (iter_client->fs_handler == iter_handler) { + data = iter_client->client_dst_data; + break; + } + } + mutex_unlock(&dst->clients_lock); + if (iter_handler->del_dst_cb) { + iter_handler->del_dst_cb(dst, ctx_changed, data, + iter_handler->client_context); + } + } + up_read(&ns->notifiers_rw_sem); +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_rule *_fs_add_dst_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_rule *dst; + int err; + + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return ERR_PTR(-ENOMEM); + + memcpy(&dst->dest_attr, dest, sizeof(*dest)); + dst->base.type = FS_TYPE_FLOW_DEST; + INIT_LIST_HEAD(&dst->clients_data); + mutex_init(&dst->clients_lock); + fs_get_parent(ft, fg); + /*Add dest to dests list- added as first element after the head*/ + list_add_tail(&dst->base.list, &fte->dests); + fte->dests_size++; + err = mlx5_cmd_fs_set_fte(fs_get_dev(&ft->base), + ft->vport, + &fte->status, + fte->val, ft->type, + ft->id, fte->index, fg->id, fte->flow_tag, + fte->action, fte->dests_size, &fte->dests); + if (err) + goto free_dst; + + list_del(&dst->base.list); + + return dst; + +free_dst: + list_del(&dst->base.list); + kfree(dst); + fte->dests_size--; + return ERR_PTR(err); +} + +static char *get_dest_name(struct mlx5_flow_destination *dest) +{ + char *name = kzalloc(sizeof(char) * 20, GFP_KERNEL); + + switch (dest->type) { + case MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE: + snprintf(name, 20, "dest_%s_%u", "flow_table", + dest->ft->id); + return name; + case MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT: + snprintf(name, 20, "dest_%s_%u", "vport", + dest->vport_num); + return name; + case MLX5_FLOW_CONTEXT_DEST_TYPE_TIR: + snprintf(name, 20, "dest_%s_%u", "tir", dest->tir_num); + return name; + } + + return NULL; +} + +/* assumed fg is locked */ +static unsigned int fs_get_free_fg_index(struct mlx5_flow_group *fg, + struct list_head **prev) +{ + struct fs_fte *fte; + unsigned int start = fg->start_index; + + if (prev) + *prev = &fg->ftes; + + /* assumed list is sorted by index */ + fs_for_each_fte(fte, fg) { + if (fte->index != start) + return start; + start++; + if (prev) + *prev = &fte->base.list; + } + + return start; +} + + +static struct fs_fte *fs_create_fte(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct list_head **prev) +{ + struct fs_fte *fte; + int index = 0; + + index = fs_get_free_fg_index(fg, prev); + fte = fs_alloc_fte(action, flow_tag, match_value, index); + if (IS_ERR(fte)) + return fte; + + return fte; +} + +static void add_rule_to_tree(struct mlx5_flow_rule *rule, + struct fs_fte *fte) +{ + char *dest_name; + + dest_name = get_dest_name(&rule->dest_attr); + fs_add_node(&rule->base, &fte->base, dest_name, 1); + /* re-add to list, since fs_add_node reset our list */ + list_add_tail(&rule->base.list, &fte->dests); + kfree(dest_name); + call_to_add_rule_notifiers(rule, fte); +} + +static void fs_del_dst(struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct fs_fte *fte; + u32 *match_value; + struct mlx5_core_dev *dev = fs_get_dev(&dst->base); + int match_len = MLX5_ST_SZ_BYTES(fte_match_param); + int err; + + WARN_ON(!dev); + + match_value = mlx5_vzalloc(match_len); + if (!match_value) { + mlx5_core_warn(dev, "failed to allocate inbox\n"); + return; + } + + fs_get_parent(fte, dst); + fs_get_parent(fg, fte); + mutex_lock(&fg->base.lock); + memcpy(match_value, fte->val, sizeof(fte->val)); + /* ft can't be changed as fg is locked */ + fs_get_parent(ft, fg); + list_del(&dst->base.list); + fte->dests_size--; + if (fte->dests_size) { + err = mlx5_cmd_fs_set_fte(dev, ft->vport, + &fte->status, match_value, ft->type, + ft->id, fte->index, fg->id, + fte->flow_tag, fte->action, + fte->dests_size, &fte->dests); + if (err) { + mlx5_core_warn(dev, "%s can't delete dst %s\n", + __func__, dst->base.name); + goto err; + } + } + call_to_del_rule_notifiers(dst, fte); +err: + mutex_unlock(&fg->base.lock); + kvfree(match_value); +} + +static void fs_del_fte(struct fs_fte *fte) +{ + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + int err; + struct mlx5_core_dev *dev; + + fs_get_parent(fg, fte); + fs_get_parent(ft, fg); + + dev = fs_get_dev(&ft->base); + WARN_ON(!dev); + + err = mlx5_cmd_fs_delete_fte(dev, ft->vport, &fte->status, + ft->type, ft->id, fte->index); + if (err) + mlx5_core_warn(dev, "flow steering can't delete fte %s\n", + fte->base.name); + + fg->num_ftes--; +} + +/* assuming parent fg is locked */ +/* Add dst algorithm */ +static struct mlx5_flow_rule *fs_add_dst_fg(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct fs_fte *fte; + struct mlx5_flow_rule *dst; + struct mlx5_flow_table *ft; + struct list_head *prev; + char fte_name[20]; + + mutex_lock(&fg->base.lock); + fs_for_each_fte(fte, fg) { + /* TODO: Check of size against PRM max size */ + mutex_lock(&fte->base.lock); + if (fs_match_exact_val(&fg->mask, match_value, &fte->val) && + action == fte->action && flow_tag == fte->flow_tag) { + dst = _fs_add_dst_fte(fte, fg, dest); + mutex_unlock(&fte->base.lock); + if (IS_ERR(dst)) + goto unlock_fg; + goto add_rule; + } + mutex_unlock(&fte->base.lock); + } + + fs_get_parent(ft, fg); + if (fg->num_ftes == fg->max_ftes) { + dst = ERR_PTR(-ENOSPC); + goto unlock_fg; + } + + fte = fs_create_fte(fg, match_value, action, flow_tag, &prev); + if (IS_ERR(fte)) { + dst = (void *)fte; + goto unlock_fg; + } + dst = _fs_add_dst_fte(fte, fg, dest); + if (IS_ERR(dst)) { + kfree(fte); + goto unlock_fg; + } + + fg->num_ftes++; + + snprintf(fte_name, sizeof(fte_name), "fte%u", fte->index); + /* Add node to tree */ + fs_add_node(&fte->base, &fg->base, fte_name, 0); + list_add(&fte->base.list, prev); +add_rule: + add_rule_to_tree(dst, fte); +unlock_fg: + mutex_unlock(&fg->base.lock); + return dst; +} + +static struct mlx5_flow_rule *fs_add_dst_ft(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u8 action, u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + /*? where dst_entry is allocated*/ + struct mlx5_flow_group *g; + struct mlx5_flow_rule *dst; + + fs_get(&ft->base); + mutex_lock(&ft->base.lock); + fs_for_each_fg(g, ft) + if (fs_match_exact_mask(g->mask.match_criteria_enable, + match_criteria_enable, + g->mask.match_criteria, + match_criteria)) { + mutex_unlock(&ft->base.lock); + + dst = fs_add_dst_fg(g, match_value, + action, flow_tag, dest); + if (PTR_ERR(dst) && PTR_ERR(dst) != -ENOSPC) + goto unlock; + } + mutex_unlock(&ft->base.lock); + + g = create_autogroup(ft, match_criteria_enable, match_criteria); + if (IS_ERR(g)) { + dst = (void *)g; + goto unlock; + } + + dst = fs_add_dst_fg(g, match_value, + action, flow_tag, dest); + if (IS_ERR(dst)) { + /* Remove assumes refcount > 0 and autogroup creates a group + * with a refcount = 0. + */ + fs_get(&g->base); + fs_remove_node(&g->base); + goto unlock; + } + +unlock: + fs_put(&ft->base); + return dst; +} + +struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *dst; + struct mlx5_flow_namespace *ns; + + ns = get_ns_with_notifiers(&ft->base); + if (ns) + down_read(&ns->dests_rw_sem); + dst = fs_add_dst_ft(ft, match_criteria_enable, match_criteria, + match_value, action, flow_tag, dest); + if (ns) + up_read(&ns->dests_rw_sem); + + return dst; + + +} +EXPORT_SYMBOL(mlx5_add_flow_rule); + +void mlx5_del_flow_rule(struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_namespace *ns; + + ns = get_ns_with_notifiers(&dst->base); + if (ns) + down_read(&ns->dests_rw_sem); + fs_remove_node(&dst->base); + if (ns) + up_read(&ns->dests_rw_sem); +} +EXPORT_SYMBOL(mlx5_del_flow_rule); + +#define MLX5_CORE_FS_ROOT_NS_NAME "root" +#define MLX5_CORE_FS_ESW_EGRESS_ACL "esw_egress_root" +#define MLX5_CORE_FS_ESW_INGRESS_ACL "esw_ingress_root" +#define MLX5_CORE_FS_FDB_ROOT_NS_NAME "fdb_root" +#define MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME "sniffer_rx_root" +#define MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME "sniffer_tx_root" +#define MLX5_CORE_FS_PRIO_MAX_FT 4 +#define MLX5_CORE_FS_PRIO_MAX_NS 1 + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned prio, int max_ft, + const char *name, u8 flags) +{ + struct fs_prio *fs_prio; + + fs_prio = kzalloc(sizeof(*fs_prio), GFP_KERNEL); + if (!fs_prio) + return ERR_PTR(-ENOMEM); + + fs_prio->base.type = FS_TYPE_PRIO; + fs_add_node(&fs_prio->base, &ns->base, name, 1); + fs_prio->max_ft = max_ft; + fs_prio->max_ns = MLX5_CORE_FS_PRIO_MAX_NS; + fs_prio->prio = prio; + fs_prio->flags = flags; + list_add_tail(&fs_prio->base.list, &ns->prios); + INIT_LIST_HEAD(&fs_prio->objs); + mutex_init(&fs_prio->shared_lock); + + return fs_prio; +} + +static void cleanup_root_ns(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_root_namespace *root_ns = dev->root_ns; + struct fs_prio *iter_prio; + + if (!root_ns) + return; + + /* stage 1 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + struct mlx5_flow_namespace *iter_ns; + + fs_for_each_ns(iter_ns, iter_prio) { + while (!list_empty(&iter_ns->prios)) { + struct fs_base *iter_prio2 = + list_first_entry(&iter_ns->prios, + struct fs_base, + list); + + fs_remove_node(iter_prio2); + } + } + } + + /* stage 2 */ + fs_for_each_prio(iter_prio, &root_ns->ns) { + while (!list_empty(&iter_prio->objs)) { + struct fs_base *iter_ns = + list_first_entry(&iter_prio->objs, + struct fs_base, + list); + + fs_remove_node(iter_ns); + } + } + /* stage 3 */ + while (!list_empty(&root_ns->ns.prios)) { + struct fs_base *iter_prio = + list_first_entry(&root_ns->ns.prios, + struct fs_base, + list); + + fs_remove_node(iter_prio); + } + + fs_remove_node(&root_ns->ns.base); + dev->root_ns = NULL; +} + +static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, + struct mlx5_flow_root_namespace *root_ns) +{ + struct fs_base *prio; + + if (!root_ns) + return; + + if (!list_empty(&root_ns->ns.prios)) { + prio = list_first_entry(&root_ns->ns.prios, + struct fs_base, + list); + fs_remove_node(prio); + } + fs_remove_node(&root_ns->ns.base); + root_ns = NULL; +} + +void mlx5_cleanup_fs(struct mlx5_core_dev *dev) +{ + cleanup_root_ns(dev); + cleanup_single_prio_root_ns(dev, dev->sniffer_rx_root_ns); + cleanup_single_prio_root_ns(dev, dev->sniffer_tx_root_ns); + cleanup_single_prio_root_ns(dev, dev->fdb_root_ns); + cleanup_single_prio_root_ns(dev, dev->esw_egress_root_ns); + cleanup_single_prio_root_ns(dev, dev->esw_ingress_root_ns); +} + +static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace + *ns) +{ + ns->base.type = FS_TYPE_NAMESPACE; + init_rwsem(&ns->dests_rw_sem); + init_rwsem(&ns->notifiers_rw_sem); + INIT_LIST_HEAD(&ns->prios); + INIT_LIST_HEAD(&ns->list_notifiers); + + return ns; +} + +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, + enum fs_ft_type + table_type, + char *name) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_namespace *ns; + + /* create the root namespace */ + root_ns = mlx5_vzalloc(sizeof(*root_ns)); + if (!root_ns) + goto err; + + root_ns->dev = dev; + root_ns->table_type = table_type; + mutex_init(&root_ns->fs_chain_lock); + + ns = &root_ns->ns; + fs_init_namespace(ns); + fs_add_node(&ns->base, NULL, name, 1); + + return root_ns; +err: + return NULL; +} + +static int init_fdb_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->fdb_root_ns = create_root_ns(dev, FS_FT_FDB, + MLX5_CORE_FS_FDB_ROOT_NS_NAME); + if (!dev->fdb_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->fdb_root_ns->ns, 0, 1, "fdb_prio", 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + +#define MAX_VPORTS 128 + +static int init_egress_acl_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL, + MLX5_CORE_FS_ESW_EGRESS_ACL); + if (!dev->esw_egress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->esw_egress_root_ns->ns, 0, MAX_VPORTS, + "esw_egress_prio", 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + +static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL, + MLX5_CORE_FS_ESW_INGRESS_ACL); + if (!dev->esw_ingress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->esw_ingress_root_ns->ns, 0, MAX_VPORTS, + "esw_ingress_prio", 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + +static int init_sniffer_rx_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->sniffer_rx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_RX, + MLX5_CORE_FS_SNIFFER_RX_ROOT_NS_NAME); + if (!dev->sniffer_rx_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->sniffer_rx_root_ns->ns, 0, 1, + "sniffer_prio", 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + + +static int init_sniffer_tx_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->sniffer_tx_root_ns = create_root_ns(dev, FS_FT_SNIFFER_TX, + MLX5_CORE_FS_SNIFFER_TX_ROOT_NS_NAME); + if (!dev->sniffer_tx_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->sniffer_tx_root_ns->ns, 0, 1, + "sniffer_prio", 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + +static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio, + const char *name) +{ + struct mlx5_flow_namespace *ns; + + ns = kzalloc(sizeof(*ns), GFP_KERNEL); + if (!ns) + return ERR_PTR(-ENOMEM); + + fs_init_namespace(ns); + fs_add_node(&ns->base, &prio->base, name, 1); + list_add_tail(&ns->base.list, &prio->objs); + + return ns; +} + +#define FLOW_TABLE_BIT_SZ 1 +#define GET_FLOW_TABLE_CAP(dev, offset) \ + ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \ + offset / 32)) >> \ + (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) + +static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) +{ + int i; + + for (i = 0; i < caps->arr_sz; i++) { + if (!GET_FLOW_TABLE_CAP(dev, caps->caps[i])) + return false; + } + return true; +} + +static int _init_root_tree(struct mlx5_core_dev *dev, int max_ft_level, + struct init_tree_node *node, struct fs_base *base_parent, + struct init_tree_node *tree_parent) +{ + struct mlx5_flow_namespace *fs_ns; + struct fs_prio *fs_prio; + int priority; + struct fs_base *base; + int i; + int err = 0; + + if (node->type == FS_TYPE_PRIO) { + if ((node->min_ft_level > max_ft_level) || + !has_required_caps(dev, &node->caps)) + goto out; + + fs_get_obj(fs_ns, base_parent); + priority = node - tree_parent->children; + fs_prio = fs_create_prio(fs_ns, priority, + node->max_ft, + node->name, node->flags); + if (IS_ERR(fs_prio)) { + err = PTR_ERR(fs_prio); + goto out; + } + base = &fs_prio->base; + } else if (node->type == FS_TYPE_NAMESPACE) { + fs_get_obj(fs_prio, base_parent); + fs_ns = fs_create_namespace(fs_prio, node->name); + if (IS_ERR(fs_ns)) { + err = PTR_ERR(fs_ns); + goto out; + } + base = &fs_ns->base; + } else { + return -EINVAL; + } + for (i = 0; i < node->ar_size; i++) { + err = _init_root_tree(dev, max_ft_level, &node->children[i], base, + node); + if (err) + break; + } +out: + return err; +} + +static int init_root_tree(struct mlx5_core_dev *dev, int max_ft_level, + struct init_tree_node *node, struct fs_base *parent) +{ + int i; + struct mlx5_flow_namespace *fs_ns; + int err = 0; + + fs_get_obj(fs_ns, parent); + for (i = 0; i < node->ar_size; i++) { + err = _init_root_tree(dev, max_ft_level, + &node->children[i], &fs_ns->base, node); + if (err) + break; + } + return err; +} + +static int sum_max_ft_in_prio(struct fs_prio *prio); +static int sum_max_ft_in_ns(struct mlx5_flow_namespace *ns) +{ + struct fs_prio *prio; + int sum = 0; + + fs_for_each_prio(prio, ns) { + sum += sum_max_ft_in_prio(prio); + } + return sum; +} + +static int sum_max_ft_in_prio(struct fs_prio *prio) +{ + int sum = 0; + struct fs_base *it; + struct mlx5_flow_namespace *ns; + + if (prio->max_ft) + return prio->max_ft; + + fs_for_each_ns_or_ft(it, prio) { + if (it->type == FS_TYPE_FLOW_TABLE) + continue; + + fs_get_obj(ns, it); + sum += sum_max_ft_in_ns(ns); + } + prio->max_ft = sum; + return sum; +} + +static void set_max_ft(struct mlx5_flow_namespace *ns) +{ + struct fs_prio *prio; + + if (!ns) + return; + + fs_for_each_prio(prio, ns) + sum_max_ft_in_prio(prio); +} + +static int init_root_ns(struct mlx5_core_dev *dev) +{ + int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + flow_table_properties_nic_receive. + max_ft_level); + + dev->root_ns = create_root_ns(dev, FS_FT_NIC_RX, + MLX5_CORE_FS_ROOT_NS_NAME); + if (IS_ERR_OR_NULL(dev->root_ns)) + goto err; + + + if (init_root_tree(dev, max_ft_level, &root_fs, &dev->root_ns->ns.base)) + goto err; + + set_max_ft(&dev->root_ns->ns); + + return 0; +err: + return -ENOMEM; +} + +u8 mlx5_get_match_criteria_enable(struct mlx5_flow_rule *rule) +{ + struct fs_base *pbase; + struct mlx5_flow_group *fg; + + pbase = rule->base.parent; + WARN_ON(!pbase); + pbase = pbase->parent; + WARN_ON(!pbase); + + fs_get_obj(fg, pbase); + return fg->mask.match_criteria_enable; +} + +void mlx5_get_match_value(u32 *match_value, + struct mlx5_flow_rule *rule) +{ + struct fs_base *pbase; + struct fs_fte *fte; + + pbase = rule->base.parent; + WARN_ON(!pbase); + fs_get_obj(fte, pbase); + + memcpy(match_value, fte->val, sizeof(fte->val)); +} + +void mlx5_get_match_criteria(u32 *match_criteria, + struct mlx5_flow_rule *rule) +{ + struct fs_base *pbase; + struct mlx5_flow_group *fg; + + pbase = rule->base.parent; + WARN_ON(!pbase); + pbase = pbase->parent; + WARN_ON(!pbase); + + fs_get_obj(fg, pbase); + memcpy(match_criteria, &fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); +} + +int mlx5_init_fs(struct mlx5_core_dev *dev) +{ + int err; + + if (MLX5_CAP_GEN(dev, nic_flow_table)) { + err = init_root_ns(dev); + if (err) + goto err; + } + + err = init_fdb_root_ns(dev); + if (err) + goto err; + + err = init_egress_acl_root_ns(dev); + if (err) + goto err; + + err = init_ingress_acl_root_ns(dev); + if (err) + goto err; + + err = init_sniffer_tx_root_ns(dev); + if (err) + goto err; + + err = init_sniffer_rx_root_ns(dev); + if (err) + goto err; + + return 0; +err: + mlx5_cleanup_fs(dev); + return err; +} + +struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type) +{ + struct mlx5_flow_root_namespace *root_ns = dev->root_ns; + int prio; + static struct fs_prio *fs_prio; + struct mlx5_flow_namespace *ns; + + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + prio = 0; + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + prio = 1; + break; + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + prio = 2; + break; + case MLX5_FLOW_NAMESPACE_FDB: + if (dev->fdb_root_ns) + return &dev->fdb_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (dev->esw_egress_root_ns) + return &dev->esw_egress_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (dev->esw_ingress_root_ns) + return &dev->esw_ingress_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_RX: + if (dev->sniffer_rx_root_ns) + return &dev->sniffer_rx_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_TX: + if (dev->sniffer_tx_root_ns) + return &dev->sniffer_tx_root_ns->ns; + else + return NULL; + default: + return NULL; + } + + if (!root_ns) + return NULL; + + fs_prio = find_prio(&root_ns->ns, prio); + if (!fs_prio) + return NULL; + + ns = list_first_entry(&fs_prio->objs, + typeof(*ns), + base.list); + + return ns; +} +EXPORT_SYMBOL(mlx5_get_flow_namespace); + + +int mlx5_set_rule_private_data(struct mlx5_flow_rule *rule, + struct mlx5_flow_handler *fs_handler, + void *client_data) +{ + struct fs_client_priv_data *priv_data; + + mutex_lock(&rule->clients_lock); + /*Check that hanlder isn't exists in the list already*/ + list_for_each_entry(priv_data, &rule->clients_data, list) { + if (priv_data->fs_handler == fs_handler) { + priv_data->client_dst_data = client_data; + goto unlock; + } + } + priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL); + if (!priv_data) { + mutex_unlock(&rule->clients_lock); + return -ENOMEM; + } + + priv_data->client_dst_data = client_data; + priv_data->fs_handler = fs_handler; + list_add(&priv_data->list, &rule->clients_data); + +unlock: + mutex_unlock(&rule->clients_lock); + + return 0; +} + +static int remove_from_clients(struct mlx5_flow_rule *rule, + bool ctx_changed, + void *client_data, + void *context) +{ + struct fs_client_priv_data *iter_client; + struct fs_client_priv_data *temp_client; + struct mlx5_flow_handler *handler = (struct + mlx5_flow_handler*)context; + + mutex_lock(&rule->clients_lock); + list_for_each_entry_safe(iter_client, temp_client, + &rule->clients_data, list) { + if (iter_client->fs_handler == handler) { + list_del(&iter_client->list); + kfree(iter_client); + break; + } + } + mutex_unlock(&rule->clients_lock); + + return 0; +} + +struct mlx5_flow_handler *mlx5_register_rule_notifier(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type ns_type, + rule_event_fn add_cb, + rule_event_fn del_cb, + void *context) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_handler *handler; + + ns = mlx5_get_flow_namespace(dev, ns_type); + if (!ns) + return ERR_PTR(-EINVAL); + + handler = kzalloc(sizeof(*handler), GFP_KERNEL); + if (!handler) + return ERR_PTR(-ENOMEM); + + handler->add_dst_cb = add_cb; + handler->del_dst_cb = del_cb; + handler->client_context = context; + handler->ns = ns; + down_write(&ns->notifiers_rw_sem); + list_add_tail(&handler->list, &ns->list_notifiers); + up_write(&ns->notifiers_rw_sem); + + return handler; +} + +static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns, + rule_event_fn add_rule_cb, + void *context); + +void mlx5_unregister_rule_notifier(struct mlx5_flow_handler *handler) +{ + struct mlx5_flow_namespace *ns = handler->ns; + + /*Remove from dst's clients*/ + down_write(&ns->dests_rw_sem); + down_write(&ns->notifiers_rw_sem); + iterate_rules_in_ns(ns, remove_from_clients, handler); + list_del(&handler->list); + up_write(&ns->notifiers_rw_sem); + up_write(&ns->dests_rw_sem); + kfree(handler); +} + +static void iterate_rules_in_ft(struct mlx5_flow_table *ft, + rule_event_fn add_rule_cb, + void *context) +{ + struct mlx5_flow_group *iter_fg; + struct fs_fte *iter_fte; + struct mlx5_flow_rule *iter_rule; + int err = 0; + bool is_new_rule; + + mutex_lock(&ft->base.lock); + fs_for_each_fg(iter_fg, ft) { + mutex_lock(&iter_fg->base.lock); + fs_for_each_fte(iter_fte, iter_fg) { + mutex_lock(&iter_fte->base.lock); + is_new_rule = true; + fs_for_each_dst(iter_rule, iter_fte) { + fs_get(&iter_rule->base); + err = add_rule_cb(iter_rule, + is_new_rule, + NULL, + context); + fs_put_parent_locked(&iter_rule->base); + if (err) + break; + is_new_rule = false; + } + mutex_unlock(&iter_fte->base.lock); + if (err) + break; + } + mutex_unlock(&iter_fg->base.lock); + if (err) + break; + } + mutex_unlock(&ft->base.lock); +} + +static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns, + rule_event_fn add_rule_cb, + void *context); + +static void iterate_rules_in_prio(struct fs_prio *prio, + rule_event_fn add_rule_cb, + void *context) +{ + struct fs_base *it; + + mutex_lock(&prio->base.lock); + fs_for_each_ns_or_ft(it, prio) { + if (it->type == FS_TYPE_FLOW_TABLE) { + struct mlx5_flow_table *ft; + + fs_get_obj(ft, it); + iterate_rules_in_ft(ft, add_rule_cb, context); + } else { + struct mlx5_flow_namespace *ns; + + fs_get_obj(ns, it); + iterate_rules_in_ns(ns, add_rule_cb, context); + } + } + mutex_unlock(&prio->base.lock); +} + +static void iterate_rules_in_ns(struct mlx5_flow_namespace *ns, + rule_event_fn add_rule_cb, + void *context) +{ + struct fs_prio *iter_prio; + + mutex_lock(&ns->base.lock); + fs_for_each_prio(iter_prio, ns) { + iterate_rules_in_prio(iter_prio, add_rule_cb, context); + } + mutex_unlock(&ns->base.lock); +} + +void mlx5_flow_iterate_existing_rules(struct mlx5_flow_namespace *ns, + rule_event_fn add_rule_cb, + void *context) +{ + down_write(&ns->dests_rw_sem); + down_read(&ns->notifiers_rw_sem); + iterate_rules_in_ns(ns, add_rule_cb, context); + up_read(&ns->notifiers_rw_sem); + up_write(&ns->dests_rw_sem); +} + + +void mlx5_del_flow_rules_list(struct mlx5_flow_rules_list *rules_list) +{ + struct mlx5_flow_rule_node *iter_node; + struct mlx5_flow_rule_node *temp_node; + + list_for_each_entry_safe(iter_node, temp_node, &rules_list->head, list) { + list_del(&iter_node->list); + kfree(iter_node); + } + + kfree(rules_list); +} + +#define ROCEV1_ETHERTYPE 0x8915 +static int set_rocev1_rules(struct list_head *rules_list) +{ + struct mlx5_flow_rule_node *rocev1_rule; + + rocev1_rule = kzalloc(sizeof(*rocev1_rule), GFP_KERNEL); + if (!rocev1_rule) + return -ENOMEM; + + rocev1_rule->match_criteria_enable = + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS; + MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_criteria, ethertype, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, rocev1_rule->match_value, ethertype, + ROCEV1_ETHERTYPE); + + list_add_tail(&rocev1_rule->list, rules_list); + + return 0; +} + +#define ROCEV2_UDP_PORT 4791 +static int set_rocev2_rules(struct list_head *rules_list) +{ + struct mlx5_flow_rule_node *ipv4_rule; + struct mlx5_flow_rule_node *ipv6_rule; + + ipv4_rule = kzalloc(sizeof(*ipv4_rule), GFP_KERNEL); + if (!ipv4_rule) + return -ENOMEM; + + ipv6_rule = kzalloc(sizeof(*ipv6_rule), GFP_KERNEL); + if (!ipv6_rule) { + kfree(ipv4_rule); + return -ENOMEM; + } + + ipv4_rule->match_criteria_enable = + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS; + MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ethertype, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ethertype, + 0x0800); + MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, ip_protocol, + IPPROTO_UDP); + MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_criteria, udp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, ipv4_rule->match_value, udp_dport, + ROCEV2_UDP_PORT); + + ipv6_rule->match_criteria_enable = + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS; + MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ethertype, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ethertype, + 0x86dd); + MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, ip_protocol, + IPPROTO_UDP); + MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_criteria, udp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, ipv6_rule->match_value, udp_dport, + ROCEV2_UDP_PORT); + + list_add_tail(&ipv4_rule->list, rules_list); + list_add_tail(&ipv6_rule->list, rules_list); + + return 0; +} + + +struct mlx5_flow_rules_list *get_roce_flow_rules(u8 roce_mode) +{ + int err = 0; + struct mlx5_flow_rules_list *rules_list = + kzalloc(sizeof(*rules_list), GFP_KERNEL); + + if (!rules_list) + return NULL; + + INIT_LIST_HEAD(&rules_list->head); + + if (roce_mode & MLX5_ROCE_VERSION_1_CAP) { + err = set_rocev1_rules(&rules_list->head); + if (err) + goto free_list; + } + if (roce_mode & MLX5_ROCE_VERSION_2_CAP) + err = set_rocev2_rules(&rules_list->head); + if (err) + goto free_list; + + return rules_list; + +free_list: + mlx5_del_flow_rules_list(rules_list); + return NULL; +} Index: sys/dev/mlx5/mlx5_core/mlx5_main.c =================================================================== --- sys/dev/mlx5/mlx5_core/mlx5_main.c +++ sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -42,6 +42,7 @@ #include #include #include "mlx5_core.h" +#include "fs_core.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -794,8 +795,21 @@ mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_init_fs(dev); + if (err) { + mlx5_core_err(dev, "flow steering init %d\n", err); + goto err_init_tables; + } + return 0; +err_init_tables: + mlx5_cleanup_mr_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + unmap_bf_area(dev); + err_stop_eqs: mlx5_stop_eqs(dev); @@ -848,6 +862,7 @@ { struct mlx5_priv *priv = &dev->priv; + mlx5_cleanup_fs(dev); mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1060,6 +1075,12 @@ kfree(dev); } +static void shutdown_one(struct pci_dev *pdev) +{ + /* prevent device from accessing host memory after shutdown */ + pci_clear_master(pdev); +} + static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */ { PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */ @@ -1101,6 +1122,7 @@ static struct pci_driver mlx5_core_driver = { .name = DRIVER_NAME, .id_table = mlx5_core_pci_table, + .shutdown = shutdown_one, .probe = init_one, .remove = remove_one }; Index: sys/dev/mlx5/mlx5_en/en.h =================================================================== --- sys/dev/mlx5/mlx5_en/en.h +++ sys/dev/mlx5/mlx5_en/en.h @@ -582,10 +582,13 @@ MLX5E_NUM_RQT = 2, }; +struct mlx5_flow_rule; + struct mlx5e_eth_addr_info { u8 addr [ETH_ALEN + 2]; u32 tt_vec; - u32 ft_ix[MLX5E_NUM_TT]; /* flow table index per traffic type */ + /* flow table rule per traffic type */ + struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; }; #define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) @@ -614,15 +617,24 @@ struct mlx5e_vlan_db { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - u32 active_vlans_ft_ix[VLAN_N_VID]; - u32 untagged_rule_ft_ix; - u32 any_vlan_rule_ft_ix; + struct mlx5_flow_rule *active_vlans_ft_rule[VLAN_N_VID]; + struct mlx5_flow_rule *untagged_ft_rule; + struct mlx5_flow_rule *any_cvlan_ft_rule; + struct mlx5_flow_rule *any_svlan_ft_rule; bool filter_disabled; }; struct mlx5e_flow_table { - void *vlan; - void *main; + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; +}; + +struct mlx5e_flow_tables { + struct mlx5_flow_namespace *ns; + struct mlx5e_flow_table vlan; + struct mlx5e_flow_table main; + struct mlx5e_flow_table inner_rss; }; struct mlx5e_priv { @@ -649,7 +661,7 @@ u32 rqtn; u32 tirn[MLX5E_NUM_TT]; - struct mlx5e_flow_table ft; + struct mlx5e_flow_tables fts; struct mlx5e_eth_addr_db eth_addr; struct mlx5e_vlan_db vlan; Index: sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c =================================================================== --- sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -28,7 +28,9 @@ #include "en.h" #include -#include +#include + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) enum { MLX5E_FULLMATCH = 0, @@ -97,28 +99,38 @@ mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai) { - void *ft = priv->ft.main; + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_IPSEC_ESP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_IPSEC_ESP)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV6_IPSEC_AH)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); + + if (ai->tt_vec & (1 << MLX5E_TT_IPV4_IPSEC_AH)) + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_TCP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_TCP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6_UDP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4_UDP]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); if (ai->tt_vec & (1 << MLX5E_TT_IPV6)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV6]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); if (ai->tt_vec & (1 << MLX5E_TT_IPV4)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_IPV4]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); if (ai->tt_vec & (1 << MLX5E_TT_ANY)) - mlx5_del_flow_table_entry(ft, ai->ft_ix[MLX5E_TT_ANY]); + mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); } static int @@ -213,42 +225,33 @@ static int mlx5e_add_eth_addr_rule_sub(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai, int type, - void *flow_context, void *match_criteria) -{ - u8 match_criteria_enable = 0; - void *match_value; - void *dest; - u8 *dmac; - u8 *match_criteria_dmac; - void *ft = priv->ft.main; + u32 *mc, u32 *mv) +{ + struct mlx5_flow_destination dest; + u8 mc_enable = 0; + struct mlx5_flow_rule **rule_p; + struct mlx5_flow_table *ft = priv->fts.main.t; + u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, + outer_headers.dmac_47_16); u32 *tirn = priv->tirn; u32 tt_vec; - int err; + int err = 0; - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dmac = MLX5_ADDR_OF(fte_match_param, match_value, - outer_headers.dmac_47_16); - match_criteria_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers.dmac_47_16); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_TIR); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - memset(match_criteria_dmac, 0xff, ETH_ALEN); - ether_addr_copy(dmac, ai->addr); + mc_enable = MLX5_MATCH_OUTER_HEADERS; + memset(mc_dmac, 0xff, ETH_ALEN); + ether_addr_copy(mv_dmac, ai->addr); break; case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - match_criteria_dmac[0] = 0x01; - dmac[0] = 0x01; + mc_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; break; case MLX5E_PROMISC: @@ -259,134 +262,192 @@ tt_vec = mlx5e_get_tt_vec(ai, type); - if (tt_vec & (1 << MLX5E_TT_ANY)) { - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_ANY]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_ANY]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_ANY); - } - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ethertype); - - if (tt_vec & (1 << MLX5E_TT_IPV4)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETHERTYPE_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_IPV4); - } - if (tt_vec & (1 << MLX5E_TT_IPV6)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETHERTYPE_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_IPV6); - } - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.ip_protocol); - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_UDP); - - if (tt_vec & (1 << MLX5E_TT_IPV4_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETHERTYPE_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4_UDP]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_IPV4_UDP); - } - if (tt_vec & (1 << MLX5E_TT_IPV6_UDP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETHERTYPE_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_UDP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6_UDP]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_IPV6_UDP); - } - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, - IPPROTO_TCP); - - if (tt_vec & (1 << MLX5E_TT_IPV4_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETHERTYPE_IP); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV4_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV4_TCP]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_IPV4_TCP); - } - if (tt_vec & (1 << MLX5E_TT_IPV6_TCP)) { - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, - ETHERTYPE_IPV6); - MLX5_SET(dest_format_struct, dest, destination_id, - tirn[MLX5E_TT_IPV6_TCP]); - err = mlx5_add_flow_table_entry(ft, match_criteria_enable, - match_criteria, flow_context, &ai->ft_ix[MLX5E_TT_IPV6_TCP]); - if (err) { - mlx5e_del_eth_addr_from_flow_table(priv, ai); - return (err); - } - ai->tt_vec |= (1 << MLX5E_TT_IPV6_TCP); + if (tt_vec & BIT(MLX5E_TT_ANY)) { + rule_p = &ai->ft_rule[MLX5E_TT_ANY]; + dest.tir_num = tirn[MLX5E_TT_ANY]; + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_ANY); } - return (0); + + mc_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + + if (tt_vec & BIT(MLX5E_TT_IPV4)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; + dest.tir_num = tirn[MLX5E_TT_IPV4]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IP); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; + dest.tir_num = tirn[MLX5E_TT_IPV6]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IPV6); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6); + } + + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IP); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IPV6); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IP); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IPV6); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + + ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); + + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IP); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IPV6); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); + } + + MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); + + if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IP); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); + } + + if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { + rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; + dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; + MLX5_SET(fte_match_param, mv, outer_headers.ethertype, + ETHERTYPE_IPV6); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, &dest); + if (IS_ERR_OR_NULL(*rule_p)) + goto err_del_ai; + ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); + } + + return 0; + +err_del_ai: + err = PTR_ERR(*rule_p); + *rule_p = NULL; + mlx5e_del_eth_addr_from_flow_table(priv, ai); + + return err; } static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, struct mlx5e_eth_addr_info *ai, int type) { - u32 *flow_context; u32 *match_criteria; - int err; + u32 *match_value; + int err = 0; - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { if_printf(priv->ifp, "%s: alloc failed\n", __func__); err = -ENOMEM; goto add_eth_addr_rule_out; } - err = mlx5e_add_eth_addr_rule_sub(priv, ai, type, flow_context, - match_criteria); - if (err) - if_printf(priv->ifp, "%s: failed\n", __func__); + err = mlx5e_add_eth_addr_rule_sub(priv, ai, type, match_criteria, + match_value); add_eth_addr_rule_out: kvfree(match_criteria); - kvfree(flow_context); + kvfree(match_value); + return (err); } @@ -435,106 +496,156 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, - MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, MLX5E_VLAN_RULE_TYPE_MATCH_VID, }; static int -mlx5e_add_vlan_rule(struct mlx5e_priv *priv, - enum mlx5e_vlan_rule_type rule_type, u16 vid) +mlx5e_add_vlan_rule_sub(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid, + u32 *mc, u32 *mv) { - u8 match_criteria_enable = 0; - u32 *flow_context; - void *match_value; - void *dest; - u32 *match_criteria; - u32 *ft_ix; - int err; + struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_destination dest; + u8 mc_enable = 0; + struct mlx5_flow_rule **rule_p; + int err = 0; - flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + - MLX5_ST_SZ_BYTES(dest_format_struct)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!flow_context || !match_criteria) { - if_printf(priv->ifp, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; - } - match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); - dest = MLX5_ADDR_OF(flow_context, flow_context, destination); - - MLX5_SET(flow_context, flow_context, action, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - MLX5_SET(flow_context, flow_context, destination_list_size, 1); - MLX5_SET(dest_format_struct, dest, destination_type, - MLX5_FLOW_CONTEXT_DEST_TYPE_FLOW_TABLE); - MLX5_SET(dest_format_struct, dest, destination_id, - mlx5_get_flow_table_id(priv->ft.main)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fts.main.t; - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.cvlan_tag); + mc_enable = MLX5_MATCH_OUTER_HEADERS; switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - ft_ix = &priv->vlan.untagged_rule_ft_ix; + rule_p = &priv->vlan.untagged_ft_rule; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - ft_ix = &priv->vlan.any_vlan_rule_ft_ix; - MLX5_SET(fte_match_param, match_value, outer_headers.cvlan_tag, - 1); + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + rule_p = &priv->vlan.any_cvlan_ft_rule; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, mv, outer_headers.cvlan_tag, 1); break; - default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; - MLX5_SET(fte_match_param, match_value, outer_headers.cvlan_tag, - 1); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - outer_headers.first_vid); - MLX5_SET(fte_match_param, match_value, outer_headers.first_vid, - vid); + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + rule_p = &priv->vlan.any_svlan_ft_rule; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET(fte_match_param, mv, outer_headers.svlan_tag, 1); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + rule_p = &priv->vlan.active_vlans_ft_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, mv, outer_headers.cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); mlx5e_vport_context_update_vlans(priv); break; } - err = mlx5_add_flow_table_entry(priv->ft.vlan, match_criteria_enable, - match_criteria, flow_context, ft_ix); - if (err) - if_printf(priv->ifp, "%s: failed\n", __func__); + *rule_p = mlx5_add_flow_rule(ft, mc_enable, mc, mv, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_ETH_FLOW_TAG, + &dest); + + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + if_printf(priv->ifp, "%s: add rule failed\n", __func__); + } + + return (err); +} + +static int +mlx5e_add_vlan_rule(struct mlx5e_priv *priv, + enum mlx5e_vlan_rule_type rule_type, u16 vid) +{ + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + if_printf(priv->ifp, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_vlan_rule_out; + } + + err = mlx5e_add_vlan_rule_sub(priv, rule_type, vid, match_criteria, + match_value); add_vlan_rule_out: kvfree(match_criteria); - kvfree(flow_context); + kvfree(match_value); + return (err); } + static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid) { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.untagged_rule_ft_ix); + if (priv->vlan.untagged_ft_rule) { + mlx5_del_flow_rule(priv->vlan.untagged_ft_rule); + priv->vlan.untagged_ft_rule = NULL; + } break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.any_vlan_rule_ft_ix); + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + if (priv->vlan.any_cvlan_ft_rule) { + mlx5_del_flow_rule(priv->vlan.any_cvlan_ft_rule); + priv->vlan.any_cvlan_ft_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + if (priv->vlan.any_svlan_ft_rule) { + mlx5_del_flow_rule(priv->vlan.any_svlan_ft_rule); + priv->vlan.any_svlan_ft_rule = NULL; + } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: - mlx5_del_flow_table_entry(priv->ft.vlan, - priv->vlan.active_vlans_ft_ix[vid]); + if (priv->vlan.active_vlans_ft_rule[vid]) { + mlx5_del_flow_rule(priv->vlan.active_vlans_ft_rule[vid]); + priv->vlan.active_vlans_ft_rule[vid] = NULL; + } mlx5e_vport_context_update_vlans(priv); break; + default: + break; } } +static void +mlx5e_del_any_vid_rules(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static int +mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + if (err) + return (err); + + return (mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0)); +} + void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { if (priv->vlan.filter_disabled) { priv->vlan.filter_disabled = false; + if (priv->ifp->if_flags & IFF_PROMISC) + return; if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + mlx5e_del_any_vid_rules(priv); } } @@ -543,9 +654,10 @@ { if (!priv->vlan.filter_disabled) { priv->vlan.filter_disabled = true; + if (priv->ifp->if_flags & IFF_PROMISC) + return; if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + mlx5e_add_any_vid_rules(priv); } } @@ -558,8 +670,8 @@ return; PRIV_LOCK(priv); - set_bit(vid, priv->vlan.active_vlans); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_and_set_bit(vid, priv->vlan.active_vlans) && + test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); PRIV_UNLOCK(priv); } @@ -582,12 +694,12 @@ int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv) { - u16 vid; int err; + int i; - for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(i, priv->vlan.active_vlans, VLAN_N_VID) { err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, - vid); + i); if (err) return (err); } @@ -597,8 +709,7 @@ return (err); if (priv->vlan.filter_disabled) { - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + err = mlx5e_add_any_vid_rules(priv); if (err) return (err); } @@ -608,15 +719,15 @@ void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv) { - u16 vid; + int i; if (priv->vlan.filter_disabled) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_del_any_vid_rules(priv); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(vid, priv->vlan.active_vlans, VLAN_N_VID) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + for_each_set_bit(i, priv->vlan.active_vlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ @@ -812,8 +923,11 @@ ether_addr_copy(priv->eth_addr.broadcast.addr, priv->ifp->if_broadcastaddr); - if (enable_promisc) + if (enable_promisc) { mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->vlan.filter_disabled) + mlx5e_add_any_vid_rules(priv); + } if (enable_allmulti) mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) @@ -825,8 +939,11 @@ mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); if (disable_allmulti) mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); - if (disable_promisc) + if (disable_promisc) { + if (!priv->vlan.filter_disabled) + mlx5e_del_any_vid_rules(priv); mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + } ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; @@ -847,127 +964,487 @@ PRIV_UNLOCK(priv); } -static int -mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +static void +mlx5e_destroy_groups(struct mlx5e_flow_table *ft) { - struct mlx5_flow_table_group *g; - u8 *dmac; - - g = malloc(9 * sizeof(*g), M_MLX5EN, M_WAITOK | M_ZERO); - if (g == NULL) - return (-ENOMEM); + int i; - g[0].log_sz = 2; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.ip_protocol); + for (i = ft->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ft->g[i])) + mlx5_destroy_flow_group(ft->g[i]); + ft->g[i] = NULL; + } + ft->num_groups = 0; +} - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.ethertype); +static void +mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) +{ + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} - g[2].log_sz = 0; +#define MLX5E_NUM_MAIN_GROUPS 10 +#define MLX5E_MAIN_GROUP0_SIZE BIT(4) +#define MLX5E_MAIN_GROUP1_SIZE BIT(3) +#define MLX5E_MAIN_GROUP2_SIZE BIT(1) +#define MLX5E_MAIN_GROUP3_SIZE BIT(0) +#define MLX5E_MAIN_GROUP4_SIZE BIT(14) +#define MLX5E_MAIN_GROUP5_SIZE BIT(13) +#define MLX5E_MAIN_GROUP6_SIZE BIT(11) +#define MLX5E_MAIN_GROUP7_SIZE BIT(2) +#define MLX5E_MAIN_GROUP8_SIZE BIT(1) +#define MLX5E_MAIN_GROUP9_SIZE BIT(0) +#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ + MLX5E_MAIN_GROUP1_SIZE +\ + MLX5E_MAIN_GROUP2_SIZE +\ + MLX5E_MAIN_GROUP3_SIZE +\ + MLX5E_MAIN_GROUP4_SIZE +\ + MLX5E_MAIN_GROUP5_SIZE +\ + MLX5E_MAIN_GROUP6_SIZE +\ + MLX5E_MAIN_GROUP7_SIZE +\ + MLX5E_MAIN_GROUP8_SIZE +\ + MLX5E_MAIN_GROUP9_SIZE +\ + 0) - g[3].log_sz = 14; - g[3].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[3].match_criteria, - outer_headers.dmac_47_16); +static int +mlx5e_create_main_groups_sub(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, + match_criteria.outer_headers.dmac_47_16); + int err; + int ix = 0; + + /* Tunnel rules need to be first in this list of groups */ + + /* Start tunnel rules */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.udp_dport); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + /* End Tunnel Rules */ + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[3].match_criteria, - outer_headers.ip_protocol); - - g[4].log_sz = 13; - g[4].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[4].match_criteria, - outer_headers.dmac_47_16); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP4_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); memset(dmac, 0xff, ETH_ALEN); - MLX5_SET_TO_ONES(fte_match_param, g[4].match_criteria, - outer_headers.ethertype); - - g[5].log_sz = 11; - g[5].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[5].match_criteria, - outer_headers.dmac_47_16); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP5_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); memset(dmac, 0xff, ETH_ALEN); - - g[6].log_sz = 2; - g[6].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[6].match_criteria, - outer_headers.dmac_47_16); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP6_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, g[6].match_criteria, - outer_headers.ip_protocol); - - g[7].log_sz = 1; - g[7].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[7].match_criteria, - outer_headers.dmac_47_16); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP7_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); dmac[0] = 0x01; - MLX5_SET_TO_ONES(fte_match_param, g[7].match_criteria, - outer_headers.ethertype); - - g[8].log_sz = 0; - g[8].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - dmac = MLX5_ADDR_OF(fte_match_param, g[8].match_criteria, - outer_headers.dmac_47_16); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP8_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); dmac[0] = 0x01; - priv->ft.main = mlx5_create_flow_table(priv->mdev, 1, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 0, 9, g); - free(g, M_MLX5EN); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_MAIN_GROUP9_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + return (0); + +err_destory_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); - return (priv->ft.main ? 0 : -ENOMEM); + return (err); } -static void -mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) +static int +mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return (-ENOMEM); + + err = mlx5e_create_main_groups_sub(ft, in, inlen); + + kvfree(in); + return (err); +} + +static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.main; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "main", + MLX5E_MAIN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return (err); + } + ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_main_flow_table; + } + + err = mlx5e_create_main_groups(ft); + if (err) + goto err_free_g; + return (0); + +err_free_g: + kfree(ft->g); + +err_destroy_main_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return (err); +} + +static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) { - mlx5_destroy_flow_table(priv->ft.main); - priv->ft.main = NULL; + mlx5e_destroy_flow_table(&priv->fts.main); } +#define MLX5E_NUM_VLAN_GROUPS 3 +#define MLX5E_VLAN_GROUP0_SIZE BIT(12) +#define MLX5E_VLAN_GROUP1_SIZE BIT(1) +#define MLX5E_VLAN_GROUP2_SIZE BIT(0) +#define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ + MLX5E_VLAN_GROUP1_SIZE +\ + MLX5E_VLAN_GROUP2_SIZE +\ + 0) + static int -mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +mlx5e_create_vlan_groups_sub(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + int err; + int ix = 0; + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + return (0); + +err_destory_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return (err); +} + +static int +mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) { - struct mlx5_flow_table_group *g; + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; - g = malloc(2 * sizeof(*g), M_MLX5EN, M_WAITOK | M_ZERO); - if (g == NULL) + in = mlx5_vzalloc(inlen); + if (!in) return (-ENOMEM); - g[0].log_sz = 12; - g[0].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, g[0].match_criteria, - outer_headers.first_vid); + err = mlx5e_create_vlan_groups_sub(ft, in, inlen); + + kvfree(in); + return (err); +} + +static int +mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.vlan; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "vlan", + MLX5E_VLAN_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return (err); + } + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_vlan_flow_table; + } + + err = mlx5e_create_vlan_groups(ft); + if (err) + goto err_free_g; + + return (0); - /* untagged + any vlan id */ - g[1].log_sz = 1; - g[1].match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, g[1].match_criteria, - outer_headers.cvlan_tag); +err_free_g: + kfree(ft->g); - priv->ft.vlan = mlx5_create_flow_table(priv->mdev, 0, - MLX5_FLOW_TABLE_TYPE_NIC_RCV, - 0, 2, g); - free(g, M_MLX5EN); +err_destroy_vlan_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; - return (priv->ft.vlan ? 0 : -ENOMEM); + return (err); } static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) { - mlx5_destroy_flow_table(priv->ft.vlan); - priv->ft.vlan = NULL; + mlx5e_destroy_flow_table(&priv->fts.vlan); +} + +#define MLX5E_NUM_INNER_RSS_GROUPS 3 +#define MLX5E_INNER_RSS_GROUP0_SIZE BIT(3) +#define MLX5E_INNER_RSS_GROUP1_SIZE BIT(1) +#define MLX5E_INNER_RSS_GROUP2_SIZE BIT(0) +#define MLX5E_INNER_RSS_TABLE_SIZE (MLX5E_INNER_RSS_GROUP0_SIZE +\ + MLX5E_INNER_RSS_GROUP1_SIZE +\ + MLX5E_INNER_RSS_GROUP2_SIZE +\ + 0) + +static int +mlx5e_create_inner_rss_groups_sub(struct mlx5e_flow_table *ft, u32 *in, + int inlen) +{ + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + int err; + int ix = 0; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_RSS_GROUP0_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ethertype); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_RSS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_RSS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destory_groups; + ft->num_groups++; + + return (0); + +err_destory_groups: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + mlx5e_destroy_groups(ft); + + return (err); +} + +static int +mlx5e_create_inner_rss_groups(struct mlx5e_flow_table *ft) +{ + u32 *in; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return (-ENOMEM); + + err = mlx5e_create_inner_rss_groups_sub(ft, in, inlen); + + kvfree(in); + return (err); +} + +static int +mlx5e_create_inner_rss_flow_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fts.inner_rss; + int err; + + ft->num_groups = 0; + ft->t = mlx5_create_flow_table(priv->fts.ns, 0, "inner_rss", + MLX5E_INNER_RSS_TABLE_SIZE); + + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return (err); + } + ft->g = kcalloc(MLX5E_NUM_INNER_RSS_GROUPS, sizeof(*ft->g), + GFP_KERNEL); + if (!ft->g) { + err = -ENOMEM; + goto err_destroy_inner_rss_flow_table; + } + + err = mlx5e_create_inner_rss_groups(ft); + if (err) + goto err_free_g; + + return (0); + +err_free_g: + kfree(ft->g); + +err_destroy_inner_rss_flow_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return (err); +} + +static void mlx5e_destroy_inner_rss_flow_table(struct mlx5e_priv *priv) +{ + mlx5e_destroy_flow_table(&priv->fts.inner_rss); } int @@ -975,11 +1452,18 @@ { int err; - err = mlx5e_create_main_flow_table(priv); + priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + err = mlx5e_create_vlan_flow_table(priv); if (err) return (err); - err = mlx5e_create_vlan_flow_table(priv); + err = mlx5e_create_main_flow_table(priv); + if (err) + goto err_destroy_vlan_flow_table; + + err = mlx5e_create_inner_rss_flow_table(priv); if (err) goto err_destroy_main_flow_table; @@ -987,6 +1471,8 @@ err_destroy_main_flow_table: mlx5e_destroy_main_flow_table(priv); +err_destroy_vlan_flow_table: + mlx5e_destroy_vlan_flow_table(priv); return (err); } @@ -994,6 +1480,7 @@ void mlx5e_close_flow_table(struct mlx5e_priv *priv) { - mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_inner_rss_flow_table(priv); mlx5e_destroy_main_flow_table(priv); + mlx5e_destroy_vlan_flow_table(priv); } Index: sys/dev/mlx5/mlx5_en/mlx5_en_main.c =================================================================== --- sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -30,7 +30,7 @@ #include #include -#define ETH_DRIVER_VERSION "3.1.0-dev" +#define ETH_DRIVER_VERSION "3.2-rc1" char mlx5e_version[] = "Mellanox Ethernet driver" " (" ETH_DRIVER_VERSION ")"; Index: sys/dev/mlx5/qp.h =================================================================== --- sys/dev/mlx5/qp.h +++ sys/dev/mlx5/qp.h @@ -66,6 +66,7 @@ MLX5_QP_OPTPAR_CQN_RCV = 1 << 19, MLX5_QP_OPTPAR_DC_HS = 1 << 20, MLX5_QP_OPTPAR_DC_KEY = 1 << 21, + }; enum mlx5_qp_state { Index: sys/modules/mlx5/Makefile =================================================================== --- sys/modules/mlx5/Makefile +++ sys/modules/mlx5/Makefile @@ -7,8 +7,8 @@ mlx5_cmd.c \ mlx5_cq.c \ mlx5_eq.c \ -mlx5_eswitch_vacl.c \ -mlx5_flow_table.c \ +mlx5_fs_cmd.c \ +mlx5_fs_tree.c \ mlx5_fw.c \ mlx5_health.c \ mlx5_mad.c \