Index: stable/9/sys/ofed/drivers/net/mlx4/cmd.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/cmd.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/cmd.c (revision 279734) @@ -1,2592 +1,2605 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #define CMD_POLL_TOKEN 0xffff #define INBOX_MASK 0xffffffffffffff00ULL #define CMD_CHAN_VER 1 #define CMD_CHAN_IF_REV 1 enum { /* command completed successfully: */ CMD_STAT_OK = 0x00, /* Internal error (such as a bus error) occurred while processing command: */ CMD_STAT_INTERNAL_ERR = 0x01, /* Operation/command not supported or opcode modifier not supported: */ CMD_STAT_BAD_OP = 0x02, /* Parameter not supported or parameter out of range: */ CMD_STAT_BAD_PARAM = 0x03, /* System not enabled or bad system state: */ CMD_STAT_BAD_SYS_STATE = 0x04, /* Attempt to access reserved or unallocaterd resource: */ CMD_STAT_BAD_RESOURCE = 0x05, /* Requested resource is currently executing a command, or is otherwise busy: */ CMD_STAT_RESOURCE_BUSY = 0x06, /* Required capability exceeds device limits: */ CMD_STAT_EXCEED_LIM = 0x08, /* Resource is not in the appropriate state or ownership: */ CMD_STAT_BAD_RES_STATE = 0x09, /* Index out of range: */ CMD_STAT_BAD_INDEX = 0x0a, /* FW image corrupted: */ CMD_STAT_BAD_NVMEM = 0x0b, /* Error in ICM mapping (e.g. not enough auxiliary ICM pages to execute command): */ CMD_STAT_ICM_ERROR = 0x0c, /* Attempt to modify a QP/EE which is not in the presumed state: */ CMD_STAT_BAD_QP_STATE = 0x10, /* Bad segment parameters (Address/Size): */ CMD_STAT_BAD_SEG_PARAM = 0x20, /* Memory Region has Memory Windows bound to: */ CMD_STAT_REG_BOUND = 0x21, /* HCA local attached memory not present: */ CMD_STAT_LAM_NOT_PRE = 0x22, /* Bad management packet (silently discarded): */ CMD_STAT_BAD_PKT = 0x30, /* More outstanding CQEs in CQ than new CQ size: */ CMD_STAT_BAD_SIZE = 0x40, /* Multi Function device support required: */ CMD_STAT_MULTI_FUNC_REQ = 0x50, }; enum { HCR_IN_PARAM_OFFSET = 0x00, HCR_IN_MODIFIER_OFFSET = 0x08, HCR_OUT_PARAM_OFFSET = 0x0c, HCR_TOKEN_OFFSET = 0x14, HCR_STATUS_OFFSET = 0x18, HCR_OPMOD_SHIFT = 12, HCR_T_BIT = 21, HCR_E_BIT = 22, HCR_GO_BIT = 23 }; enum { GO_BIT_TIMEOUT_MSECS = 10000 }; enum mlx4_vlan_transition { MLX4_VLAN_TRANSITION_VST_VST = 0, MLX4_VLAN_TRANSITION_VST_VGT = 1, MLX4_VLAN_TRANSITION_VGT_VST = 2, MLX4_VLAN_TRANSITION_VGT_VGT = 3, }; struct mlx4_cmd_context { struct completion done; int result; int next; u64 out_param; u16 token; u8 fw_status; }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, struct mlx4_vhcr_cmd *in_vhcr); static int mlx4_status_to_errno(u8 status) { static const int trans_table[] = { [CMD_STAT_INTERNAL_ERR] = -EIO, [CMD_STAT_BAD_OP] = -EPERM, [CMD_STAT_BAD_PARAM] = -EINVAL, [CMD_STAT_BAD_SYS_STATE] = -ENXIO, [CMD_STAT_BAD_RESOURCE] = -EBADF, [CMD_STAT_RESOURCE_BUSY] = -EBUSY, [CMD_STAT_EXCEED_LIM] = -ENOMEM, [CMD_STAT_BAD_RES_STATE] = -EBADF, [CMD_STAT_BAD_INDEX] = -EBADF, [CMD_STAT_BAD_NVMEM] = -EFAULT, [CMD_STAT_ICM_ERROR] = -ENFILE, [CMD_STAT_BAD_QP_STATE] = -EINVAL, [CMD_STAT_BAD_SEG_PARAM] = -EFAULT, [CMD_STAT_REG_BOUND] = -EBUSY, [CMD_STAT_LAM_NOT_PRE] = -EAGAIN, [CMD_STAT_BAD_PKT] = -EINVAL, [CMD_STAT_BAD_SIZE] = -ENOMEM, [CMD_STAT_MULTI_FUNC_REQ] = -EACCES, }; if (status >= ARRAY_SIZE(trans_table) || (status != CMD_STAT_OK && trans_table[status] == 0)) return -EIO; return trans_table[status]; } static const char *cmd_to_str(u16 cmd) { switch (cmd) { case MLX4_CMD_SYS_EN: return "SYS_EN"; case MLX4_CMD_SYS_DIS: return "SYS_DIS"; case MLX4_CMD_MAP_FA: return "MAP_FA"; case MLX4_CMD_UNMAP_FA: return "UNMAP_FA"; case MLX4_CMD_RUN_FW: return "RUN_FW"; case MLX4_CMD_MOD_STAT_CFG: return "MOD_STAT_CFG"; case MLX4_CMD_QUERY_DEV_CAP: return "QUERY_DEV_CAP"; case MLX4_CMD_QUERY_FW: return "QUERY_FW"; case MLX4_CMD_ENABLE_LAM: return "ENABLE_LAM"; case MLX4_CMD_DISABLE_LAM: return "DISABLE_LAM"; case MLX4_CMD_QUERY_DDR: return "QUERY_DDR"; case MLX4_CMD_QUERY_ADAPTER: return "QUERY_ADAPTER"; case MLX4_CMD_INIT_HCA: return "INIT_HCA"; case MLX4_CMD_CLOSE_HCA: return "CLOSE_HCA"; case MLX4_CMD_INIT_PORT: return "INIT_PORT"; case MLX4_CMD_CLOSE_PORT: return "CLOSE_PORT"; case MLX4_CMD_QUERY_HCA: return "QUERY_HCA"; case MLX4_CMD_QUERY_PORT: return "QUERY_PORT"; case MLX4_CMD_SENSE_PORT: return "SENSE_PORT"; case MLX4_CMD_HW_HEALTH_CHECK: return "HW_HEALTH_CHECK"; case MLX4_CMD_SET_PORT: return "SET_PORT"; case MLX4_CMD_SET_NODE: return "SET_NODE"; case MLX4_CMD_QUERY_FUNC: return "QUERY_FUNC"; case MLX4_CMD_MAP_ICM: return "MAP_ICM"; case MLX4_CMD_UNMAP_ICM: return "UNMAP_ICM"; case MLX4_CMD_MAP_ICM_AUX: return "MAP_ICM_AUX"; case MLX4_CMD_UNMAP_ICM_AUX: return "UNMAP_ICM_AUX"; case MLX4_CMD_SET_ICM_SIZE: return "SET_ICM_SIZE"; /*master notify fw on finish for slave's flr*/ case MLX4_CMD_INFORM_FLR_DONE: return "INFORM_FLR_DONE"; case MLX4_CMD_GET_OP_REQ: return "GET_OP_REQ"; /* TPT commands */ case MLX4_CMD_SW2HW_MPT: return "SW2HW_MPT"; case MLX4_CMD_QUERY_MPT: return "QUERY_MPT"; case MLX4_CMD_HW2SW_MPT: return "HW2SW_MPT"; case MLX4_CMD_READ_MTT: return "READ_MTT"; case MLX4_CMD_WRITE_MTT: return "WRITE_MTT"; case MLX4_CMD_SYNC_TPT: return "SYNC_TPT"; /* EQ commands */ case MLX4_CMD_MAP_EQ: return "MAP_EQ"; case MLX4_CMD_SW2HW_EQ: return "SW2HW_EQ"; case MLX4_CMD_HW2SW_EQ: return "HW2SW_EQ"; case MLX4_CMD_QUERY_EQ: return "QUERY_EQ"; /* CQ commands */ case MLX4_CMD_SW2HW_CQ: return "SW2HW_CQ"; case MLX4_CMD_HW2SW_CQ: return "HW2SW_CQ"; case MLX4_CMD_QUERY_CQ: return "QUERY_CQ:"; case MLX4_CMD_MODIFY_CQ: return "MODIFY_CQ:"; /* SRQ commands */ case MLX4_CMD_SW2HW_SRQ: return "SW2HW_SRQ"; case MLX4_CMD_HW2SW_SRQ: return "HW2SW_SRQ"; case MLX4_CMD_QUERY_SRQ: return "QUERY_SRQ"; case MLX4_CMD_ARM_SRQ: return "ARM_SRQ"; /* QP/EE commands */ case MLX4_CMD_RST2INIT_QP: return "RST2INIT_QP"; case MLX4_CMD_INIT2RTR_QP: return "INIT2RTR_QP"; case MLX4_CMD_RTR2RTS_QP: return "RTR2RTS_QP"; case MLX4_CMD_RTS2RTS_QP: return "RTS2RTS_QP"; case MLX4_CMD_SQERR2RTS_QP: return "SQERR2RTS_QP"; case MLX4_CMD_2ERR_QP: return "2ERR_QP"; case MLX4_CMD_RTS2SQD_QP: return "RTS2SQD_QP"; case MLX4_CMD_SQD2SQD_QP: return "SQD2SQD_QP"; case MLX4_CMD_SQD2RTS_QP: return "SQD2RTS_QP"; case MLX4_CMD_2RST_QP: return "2RST_QP"; case MLX4_CMD_QUERY_QP: return "QUERY_QP"; case MLX4_CMD_INIT2INIT_QP: return "INIT2INIT_QP"; case MLX4_CMD_SUSPEND_QP: return "SUSPEND_QP"; case MLX4_CMD_UNSUSPEND_QP: return "UNSUSPEND_QP"; /* special QP and management commands */ case MLX4_CMD_CONF_SPECIAL_QP: return "CONF_SPECIAL_QP"; case MLX4_CMD_MAD_IFC: return "MAD_IFC"; /* multicast commands */ case MLX4_CMD_READ_MCG: return "READ_MCG"; case MLX4_CMD_WRITE_MCG: return "WRITE_MCG"; case MLX4_CMD_MGID_HASH: return "MGID_HASH"; /* miscellaneous commands */ case MLX4_CMD_DIAG_RPRT: return "DIAG_RPRT"; case MLX4_CMD_NOP: return "NOP"; case MLX4_CMD_ACCESS_MEM: return "ACCESS_MEM"; case MLX4_CMD_SET_VEP: return "SET_VEP"; /* Ethernet specific commands */ case MLX4_CMD_SET_VLAN_FLTR: return "SET_VLAN_FLTR"; case MLX4_CMD_SET_MCAST_FLTR: return "SET_MCAST_FLTR"; case MLX4_CMD_DUMP_ETH_STATS: return "DUMP_ETH_STATS"; /* Communication channel commands */ case MLX4_CMD_ARM_COMM_CHANNEL: return "ARM_COMM_CHANNEL"; case MLX4_CMD_GEN_EQE: return "GEN_EQE"; /* virtual commands */ case MLX4_CMD_ALLOC_RES: return "ALLOC_RES"; case MLX4_CMD_FREE_RES: return "FREE_RES"; case MLX4_CMD_MCAST_ATTACH: return "MCAST_ATTACH"; case MLX4_CMD_UCAST_ATTACH: return "UCAST_ATTACH"; case MLX4_CMD_PROMISC: return "PROMISC"; case MLX4_CMD_QUERY_FUNC_CAP: return "QUERY_FUNC_CAP"; case MLX4_CMD_QP_ATTACH: return "QP_ATTACH"; /* debug commands */ case MLX4_CMD_QUERY_DEBUG_MSG: return "QUERY_DEBUG_MSG"; case MLX4_CMD_SET_DEBUG_MSG: return "SET_DEBUG_MSG"; /* statistics commands */ case MLX4_CMD_QUERY_IF_STAT: return "QUERY_IF_STAT"; case MLX4_CMD_SET_IF_STAT: return "SET_IF_STAT"; /* register/delete flow steering network rules */ case MLX4_QP_FLOW_STEERING_ATTACH: return "QP_FLOW_STEERING_ATTACH"; case MLX4_QP_FLOW_STEERING_DETACH: return "QP_FLOW_STEERING_DETACH"; case MLX4_FLOW_STEERING_IB_UC_QP_RANGE: return "FLOW_STEERING_IB_UC_QP_RANGE"; default: return "OTHER"; } } static u8 mlx4_errno_to_status(int errno) { switch (errno) { case -EPERM: return CMD_STAT_BAD_OP; case -EINVAL: return CMD_STAT_BAD_PARAM; case -ENXIO: return CMD_STAT_BAD_SYS_STATE; case -EBUSY: return CMD_STAT_RESOURCE_BUSY; case -ENOMEM: return CMD_STAT_EXCEED_LIM; case -ENFILE: return CMD_STAT_ICM_ERROR; default: return CMD_STAT_INTERNAL_ERR; } } static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); u32 status = readl(&priv->mfunc.comm->slave_read); return (swab32(status) >> 31) != priv->cmd.comm_toggle; } static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) { struct mlx4_priv *priv = mlx4_priv(dev); u32 val; priv->cmd.comm_toggle ^= 1; val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); __raw_writel((__force u32) cpu_to_be32(val), &priv->mfunc.comm->slave_write); mmiowb(); } static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout) { struct mlx4_priv *priv = mlx4_priv(dev); unsigned long end; int err = 0; int ret_from_pending = 0; /* First, verify that the master reports correct status */ if (comm_pending(dev)) { mlx4_warn(dev, "Communication channel is not idle." "my toggle is %d (cmd:0x%x)\n", priv->cmd.comm_toggle, cmd); return -EAGAIN; } /* Write command */ down(&priv->cmd.poll_sem); mlx4_comm_cmd_post(dev, cmd, param); end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) cond_resched(); ret_from_pending = comm_pending(dev); if (ret_from_pending) { /* check if the slave is trying to boot in the middle of * FLR process. The only non-zero result in the RESET command * is MLX4_DELAY_RESET_SLAVE*/ if ((MLX4_COMM_CMD_RESET == cmd)) { mlx4_warn(dev, "Got slave FLRed from Communication" " channel (ret:0x%x)\n", ret_from_pending); err = MLX4_DELAY_RESET_SLAVE; } else { mlx4_warn(dev, "Communication channel timed out\n"); err = -ETIMEDOUT; } } up(&priv->cmd.poll_sem); return err; } static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, u16 param, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; unsigned long end; int err = 0; down(&cmd->event_sem); end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) cond_resched(); if (comm_pending(dev)) { mlx4_warn(dev, "mlx4_comm_cmd_wait: Comm channel " "is not idle. My toggle is %d (op: 0x%x)\n", mlx4_priv(dev)->cmd.comm_toggle, op); up(&cmd->event_sem); return -EAGAIN; } spin_lock(&cmd->context_lock); BUG_ON(cmd->free_head < 0); context = &cmd->context[cmd->free_head]; context->token += cmd->token_mask + 1; cmd->free_head = context->next; spin_unlock(&cmd->context_lock); init_completion(&context->done); mlx4_comm_cmd_post(dev, op, param); /* In slave, wait unconditionally for completion */ wait_for_completion(&context->done); err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); goto out; } out: /* wait for comm channel ready * this is necessary for prevention the race * when switching between event to polling mode */ end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) cond_resched(); spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; spin_unlock(&cmd->context_lock); up(&cmd->event_sem); return err; } int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout) { if (mlx4_priv(dev)->cmd.use_events) return mlx4_comm_cmd_wait(dev, cmd, param, timeout); return mlx4_comm_cmd_poll(dev, cmd, param, timeout); } static int cmd_pending(struct mlx4_dev *dev) { u32 status; if (pci_channel_offline(dev->pdev)) return -EIO; status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); return (status & swab32(1 << HCR_GO_BIT)) || (mlx4_priv(dev)->cmd.toggle == !!(status & swab32(1 << HCR_T_BIT))); } static int get_status(struct mlx4_dev *dev, u32 *status, int *go_bit, int *t_bit) { if (pci_channel_offline(dev->pdev)) return -EIO; *status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); *t_bit = !!(*status & swab32(1 << HCR_T_BIT)); *go_bit = !!(*status & swab32(1 << HCR_GO_BIT)); return 0; } static int mlx4_cmd_post(struct mlx4_dev *dev, struct timespec *ts1, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; int ret = -EAGAIN; unsigned long end; int err, go_bit = 0, t_bit = 0; u32 status = 0; mutex_lock(&cmd->hcr_mutex); if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ ret = -EIO; goto out; } end = jiffies; if (event) end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ ret = -EIO; goto out; } if (time_after_eq(jiffies, end)) { mlx4_err(dev, "%s:cmd_pending failed\n", __func__); goto out; } cond_resched(); } /* * We use writel (instead of something like memcpy_toio) * because writes of less than 32 bits to the HCR don't work * (and some architectures such as ia64 implement memcpy_toio * in terms of writeb). */ __raw_writel((__force u32) cpu_to_be32(in_param >> 32), hcr + 0); __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), hcr + 1); __raw_writel((__force u32) cpu_to_be32(in_modifier), hcr + 2); __raw_writel((__force u32) cpu_to_be32(out_param >> 32), hcr + 3); __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), hcr + 4); __raw_writel((__force u32) cpu_to_be32(token << 16), hcr + 5); if (ts1) ktime_get_ts(ts1); /* __raw_writel may not order writes. */ wmb(); __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | (cmd->toggle << HCR_T_BIT) | (event ? (1 << HCR_E_BIT) : 0) | (op_modifier << HCR_OPMOD_SHIFT) | op), hcr + 6); /* * Make sure that our HCR writes don't get mixed in with * writes from another CPU starting a FW command. */ mmiowb(); cmd->toggle = cmd->toggle ^ 1; ret = 0; out: if (ret) { err = get_status(dev, &status, &go_bit, &t_bit); mlx4_warn(dev, "Could not post command %s (0x%x): ret=%d, " "in_param=0x%llx, in_mod=0x%x, op_mod=0x%x, " "get_status err=%d, status_reg=0x%x, go_bit=%d, " "t_bit=%d, toggle=0x%x\n", cmd_to_str(op), op, ret, (unsigned long long) in_param, in_modifier, op_modifier, err, status, go_bit, t_bit, cmd->toggle); } mutex_unlock(&cmd->hcr_mutex); return ret; } static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; int ret; mutex_lock(&priv->cmd.slave_cmd_mutex); vhcr->in_param = cpu_to_be64(in_param); vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; vhcr->in_modifier = cpu_to_be32(in_modifier); vhcr->opcode = cpu_to_be16((((u16) op_modifier) << 12) | (op & 0xfff)); vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); vhcr->status = 0; vhcr->flags = !!(priv->cmd.use_events) << 6; if (mlx4_is_master(dev)) { ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); if (!ret) { if (out_is_imm) { if (out_param) *out_param = be64_to_cpu(vhcr->out_param); else { mlx4_err(dev, "response expected while" "output mailbox is NULL for " "command 0x%x\n", op); vhcr->status = CMD_STAT_BAD_PARAM; } } ret = mlx4_status_to_errno(vhcr->status); } } else { ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, MLX4_COMM_TIME + timeout); if (!ret) { if (out_is_imm) { if (out_param) *out_param = be64_to_cpu(vhcr->out_param); else { mlx4_err(dev, "response expected while" "output mailbox is NULL for " "command 0x%x\n", op); vhcr->status = CMD_STAT_BAD_PARAM; } } ret = mlx4_status_to_errno(vhcr->status); } else mlx4_err(dev, "failed execution of VHCR_POST command" "opcode %s (0x%x)\n", cmd_to_str(op), op); } mutex_unlock(&priv->cmd.slave_cmd_mutex); return ret; } static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { struct mlx4_priv *priv = mlx4_priv(dev); void __iomem *hcr = priv->cmd.hcr; int err = 0; unsigned long end; u32 stat; down(&priv->cmd.poll_sem); if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; goto out; } err = mlx4_cmd_post(dev, NULL, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) goto out; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { if (pci_channel_offline(dev->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; goto out; } cond_resched(); } if (cmd_pending(dev)) { mlx4_warn(dev, "command %s (0x%x) timed out (go bit not cleared)\n", cmd_to_str(op), op); err = -ETIMEDOUT; goto out; } if (out_is_imm) *out_param = (u64) be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 | (u64) be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4)); stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); if (err) mlx4_err(dev, "command %s (0x%x) failed: fw status = 0x%x\n", cmd_to_str(op), op, stat); out: up(&priv->cmd.poll_sem); return err; } void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_context *context = &priv->cmd.context[token & priv->cmd.token_mask]; /* previously timed out command completing at long last */ if (token != context->token) return; context->fw_status = status; context->result = mlx4_status_to_errno(status); context->out_param = out_param; complete(&context->done); } static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; int err = 0; int go_bit = 0, t_bit = 0, stat_err; u32 status = 0; struct timespec ts1, ts2; ktime_t t1, t2, delta; s64 ds; if (out_is_imm && !out_param) return -EINVAL; down(&cmd->event_sem); spin_lock(&cmd->context_lock); BUG_ON(cmd->free_head < 0); context = &cmd->context[cmd->free_head]; context->token += cmd->token_mask + 1; cmd->free_head = context->next; spin_unlock(&cmd->context_lock); init_completion(&context->done); err = mlx4_cmd_post(dev, &ts1, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, context->token, 1); if (err) goto out; if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { stat_err = get_status(dev, &status, &go_bit, &t_bit); mlx4_warn(dev, "command %s (0x%x) timed out: in_param=0x%llx, " "in_mod=0x%x, op_mod=0x%x, get_status err=%d, " "status_reg=0x%x, go_bit=%d, t_bit=%d, toggle=0x%x\n" , cmd_to_str(op), op, (unsigned long long) in_param, in_modifier, op_modifier, stat_err, status, go_bit, t_bit, mlx4_priv(dev)->cmd.toggle); err = -EBUSY; goto out; } if (mlx4_debug_level & MLX4_DEBUG_MASK_CMD_TIME) { ktime_get_ts(&ts2); t1 = timespec_to_ktime(ts1); t2 = timespec_to_ktime(ts2); delta = ktime_sub(t2, t1); ds = ktime_to_ns(delta); pr_info("mlx4: fw exec time for %s is %lld nsec\n", cmd_to_str(op), (long long) ds); } err = context->result; if (err) { mlx4_err(dev, "command %s (0x%x) failed: in_param=0x%llx, " "in_mod=0x%x, op_mod=0x%x, fw status = 0x%x\n", cmd_to_str(op), op, (unsigned long long) in_param, in_modifier, op_modifier, context->fw_status); + + switch(context->fw_status) { + case CMD_STAT_BAD_PARAM: + mlx4_err(dev, "Parameter is not supported, " + "parameter is out of range\n"); + break; + case CMD_STAT_EXCEED_LIM: + mlx4_err(dev, "Required capability exceeded " + "device limits\n"); + break; + default: + break; + } goto out; } if (out_is_imm) *out_param = context->out_param; out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; spin_unlock(&cmd->context_lock); up(&cmd->event_sem); return err; } int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { if (pci_channel_offline(dev->pdev)) return -EIO; if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); else return mlx4_cmd_poll(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); } EXPORT_SYMBOL_GPL(__mlx4_cmd); static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, int slave, u64 slave_addr, int size, int is_read) { u64 in_param; u64 out_param; if ((slave_addr & 0xfff) | (master_addr & 0xfff) | (slave & ~0x7f) | (size & 0xff)) { mlx4_err(dev, "Bad access mem params - slave_addr:0x%llx " "master_addr:0x%llx slave_id:%d size:%d\n", (unsigned long long) slave_addr, (unsigned long long) master_addr, slave, size); return -EINVAL; } if (is_read) { in_param = (u64) slave | slave_addr; out_param = (u64) dev->caps.function | master_addr; } else { in_param = (u64) dev->caps.function | master_addr; out_param = (u64) slave | slave_addr; } return mlx4_cmd_imm(dev, in_param, &out_param, size, 0, MLX4_CMD_ACCESS_MEM, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox) { struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf); struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf); int err; int i; if (index & 0x1f) return -EINVAL; in_mad->attr_mod = cpu_to_be32(index / 32); err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) return err; for (i = 0; i < 32; ++i) pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]); return err; } static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox) { int i; int err; for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) { err = query_pkey_block(dev, port, i, table + i, inbox, outbox); if (err) return err; } return 0; } #define PORT_CAPABILITY_LOCATION_IN_SMP 20 #define PORT_STATE_OFFSET 32 static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) { if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP) return IB_PORT_ACTIVE; else return IB_PORT_DOWN; } static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { struct ib_smp *smp = inbox->buf; u32 index; u8 port; u16 *table; int err; int vidx, pidx; struct mlx4_priv *priv = mlx4_priv(dev); struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; __be64 slave_node_guid; port = vhcr->in_modifier; if (smp->base_version == 1 && smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->class_version == 1) { if (smp->method == IB_MGMT_METHOD_GET) { if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { index = be32_to_cpu(smp->attr_mod); if (port < 1 || port > dev->caps.num_ports) return -EINVAL; table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL); if (!table) return -ENOMEM; /* need to get the full pkey table because the paravirtualized * pkeys may be scattered among several pkey blocks. */ err = get_full_pkey_table(dev, port, table, inbox, outbox); if (!err) { for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) { pidx = priv->virt2phys_pkey[slave][port - 1][vidx]; outtab[vidx % 32] = cpu_to_be16(table[pidx]); } } kfree(table); return err; } if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { /*get the slave specific caps:*/ /*do the command */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); /* modify the response for slaves */ if (!err && slave != mlx4_master_func_num(dev)) { u8 *state = outsmp->data + PORT_STATE_OFFSET; *state = (*state & 0xf0) | vf_port_state(dev, port, slave); slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4); } return err; } if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { /* compute slave's gid block */ smp->attr_mod = cpu_to_be32(slave / 8); /* execute cmd */ err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { /* if needed, move slave gid to index 0 */ if (slave % 8) memcpy(outsmp->data, outsmp->data + (slave % 8) * 8, 8); /* delete all other gids */ memset(outsmp->data + 8, 0, 56); } return err; } if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (!err) { slave_node_guid = mlx4_get_slave_node_guid(dev, slave); memcpy(outsmp->data + 12, &slave_node_guid, 8); } return err; } } } if (slave != mlx4_master_func_num(dev) && ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && smp->method == IB_MGMT_METHOD_SET))) { mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", slave, smp->method, smp->mgmt_class, be16_to_cpu(smp->attr_id)); return -EPERM; } /*default:*/ return mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } static int MLX4_CMD_DIAG_RPRT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { return -EPERM; } static int MLX4_CMD_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { return -EPERM; } int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u64 in_param; u64 out_param; int err; in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param; out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param; if (cmd->encode_slave_id) { in_param &= 0xffffffffffffff00ll; in_param |= slave; } err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (cmd->out_is_imm) vhcr->out_param = out_param; return err; } static struct mlx4_cmd_info cmd_info[] = { { .opcode = MLX4_CMD_QUERY_FW, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_FW_wrapper }, { .opcode = MLX4_CMD_QUERY_HCA, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_QUERY_DEV_CAP, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_DEV_CAP_wrapper }, { .opcode = MLX4_CMD_QUERY_FUNC_CAP, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_FUNC_CAP_wrapper }, { .opcode = MLX4_CMD_QUERY_ADAPTER, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_INIT_PORT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_INIT_PORT_wrapper }, { .opcode = MLX4_CMD_CLOSE_PORT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_CLOSE_PORT_wrapper }, { .opcode = MLX4_CMD_QUERY_PORT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_PORT_wrapper }, { .opcode = MLX4_CMD_SET_PORT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SET_PORT_wrapper }, { .opcode = MLX4_CMD_MAP_EQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_MAP_EQ_wrapper }, { .opcode = MLX4_CMD_SW2HW_EQ, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_EQ_wrapper }, { .opcode = MLX4_CMD_HW_HEALTH_CHECK, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_DIAG_RPRT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .skip_err_print = true, .verify = NULL, .wrapper = MLX4_CMD_DIAG_RPRT_wrapper }, { .opcode = MLX4_CMD_NOP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_ALLOC_RES, .has_inbox = false, .has_outbox = false, .out_is_imm = true, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_ALLOC_RES_wrapper }, { .opcode = MLX4_CMD_FREE_RES, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_FREE_RES_wrapper }, { .opcode = MLX4_CMD_SW2HW_MPT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_MPT_wrapper }, { .opcode = MLX4_CMD_QUERY_MPT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_MPT_wrapper }, { .opcode = MLX4_CMD_HW2SW_MPT, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_HW2SW_MPT_wrapper }, { .opcode = MLX4_CMD_READ_MTT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_WRITE_MTT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_WRITE_MTT_wrapper }, { .opcode = MLX4_CMD_SYNC_TPT, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, { .opcode = MLX4_CMD_HW2SW_EQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_HW2SW_EQ_wrapper }, { .opcode = MLX4_CMD_QUERY_EQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_QUERY_EQ_wrapper }, { .opcode = MLX4_CMD_SW2HW_CQ, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_CQ_wrapper }, { .opcode = MLX4_CMD_HW2SW_CQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_HW2SW_CQ_wrapper }, { .opcode = MLX4_CMD_QUERY_CQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_CQ_wrapper }, { .opcode = MLX4_CMD_MODIFY_CQ, .has_inbox = true, .has_outbox = false, .out_is_imm = true, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_MODIFY_CQ_wrapper }, { .opcode = MLX4_CMD_SW2HW_SRQ, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_SW2HW_SRQ_wrapper }, { .opcode = MLX4_CMD_HW2SW_SRQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_HW2SW_SRQ_wrapper }, { .opcode = MLX4_CMD_QUERY_SRQ, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_SRQ_wrapper }, { .opcode = MLX4_CMD_ARM_SRQ, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_ARM_SRQ_wrapper }, { .opcode = MLX4_CMD_RST2INIT_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = true, .verify = NULL, .wrapper = mlx4_RST2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2INIT_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_INIT2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2RTR_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_INIT2RTR_QP_wrapper }, { .opcode = MLX4_CMD_RTR2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_RTR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_RTS2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_RTS2RTS_QP_wrapper }, { .opcode = MLX4_CMD_SQERR2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SQERR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2ERR_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_RTS2SQD_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_SQD2SQD_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SQD2SQD_QP_wrapper }, { .opcode = MLX4_CMD_SQD2RTS_QP, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SQD2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2RST_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_2RST_QP_wrapper }, { .opcode = MLX4_CMD_QUERY_QP, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_SUSPEND_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_UNSUSPEND_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, { .opcode = MLX4_CMD_UPDATE_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .skip_err_print = true, .verify = NULL, .wrapper = MLX4_CMD_UPDATE_QP_wrapper }, { .opcode = MLX4_CMD_CONF_SPECIAL_QP, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, /* XXX verify: only demux can do this */ .wrapper = NULL }, { .opcode = MLX4_CMD_MAD_IFC, .has_inbox = true, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_MAD_IFC_wrapper }, { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QUERY_IF_STAT_wrapper }, /* Native multicast commands are not available for guests */ { .opcode = MLX4_CMD_QP_ATTACH, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QP_ATTACH_wrapper }, { .opcode = MLX4_CMD_PROMISC, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_PROMISC_wrapper }, /* Ethernet specific commands */ { .opcode = MLX4_CMD_SET_VLAN_FLTR, .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SET_VLAN_FLTR_wrapper }, { .opcode = MLX4_CMD_SET_MCAST_FLTR, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_SET_MCAST_FLTR_wrapper }, { .opcode = MLX4_CMD_DUMP_ETH_STATS, .has_inbox = false, .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_DUMP_ETH_STATS_wrapper }, { .opcode = MLX4_CMD_INFORM_FLR_DONE, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = NULL }, /* flow steering commands */ { .opcode = MLX4_QP_FLOW_STEERING_ATTACH, .has_inbox = true, .has_outbox = false, .out_is_imm = true, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper }, { .opcode = MLX4_QP_FLOW_STEERING_DETACH, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper }, /* wol commands */ { .opcode = MLX4_CMD_MOD_STAT_CFG, .has_inbox = false, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .skip_err_print = true, .verify = NULL, .wrapper = mlx4_MOD_STAT_CFG_wrapper }, }; static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, struct mlx4_vhcr_cmd *in_vhcr) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_info *cmd = NULL; struct mlx4_vhcr_cmd *vhcr_cmd = in_vhcr ? in_vhcr : priv->mfunc.vhcr; struct mlx4_vhcr *vhcr; struct mlx4_cmd_mailbox *inbox = NULL; struct mlx4_cmd_mailbox *outbox = NULL; u64 in_param; u64 out_param; int ret = 0; int i; int err = 0; /* Create sw representation of Virtual HCR */ vhcr = kzalloc(sizeof(struct mlx4_vhcr), GFP_KERNEL); if (!vhcr) return -ENOMEM; /* DMA in the vHCR */ if (!in_vhcr) { ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave, priv->mfunc.master.slave_state[slave].vhcr_dma, ALIGN(sizeof(struct mlx4_vhcr_cmd), MLX4_ACCESS_MEM_ALIGN), 1); if (ret) { mlx4_err(dev, "%s:Failed reading vhcr" "ret: 0x%x\n", __func__, ret); kfree(vhcr); return ret; } } /* Fill SW VHCR fields */ vhcr->in_param = be64_to_cpu(vhcr_cmd->in_param); vhcr->out_param = be64_to_cpu(vhcr_cmd->out_param); vhcr->in_modifier = be32_to_cpu(vhcr_cmd->in_modifier); vhcr->token = be16_to_cpu(vhcr_cmd->token); vhcr->op = be16_to_cpu(vhcr_cmd->opcode) & 0xfff; vhcr->op_modifier = (u8) (be16_to_cpu(vhcr_cmd->opcode) >> 12); vhcr->e_bit = vhcr_cmd->flags & (1 << 6); /* Lookup command */ for (i = 0; i < ARRAY_SIZE(cmd_info); ++i) { if (vhcr->op == cmd_info[i].opcode) { cmd = &cmd_info[i]; break; } } if (!cmd) { mlx4_err(dev, "unparavirt command: %s (0x%x) accepted from slave:%d\n", cmd_to_str(vhcr->op), vhcr->op, slave); vhcr_cmd->status = CMD_STAT_BAD_PARAM; goto out_status; } /* Read inbox */ if (cmd->has_inbox) { vhcr->in_param &= INBOX_MASK; inbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(inbox)) { vhcr_cmd->status = CMD_STAT_BAD_SIZE; inbox = NULL; goto out_status; } if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, vhcr->in_param, MLX4_MAILBOX_SIZE, 1)) { mlx4_err(dev, "%s: Failed reading inbox for cmd %s (0x%x)\n", __func__, cmd_to_str(cmd->opcode), cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } } /* Apply permission and bound checks if applicable */ if (cmd->verify && cmd->verify(dev, slave, vhcr, inbox)) { mlx4_warn(dev, "Command %s (0x%x) from slave: %d failed protection " "checks for resource_id: %d\n", cmd_to_str(vhcr->op), vhcr->op, slave, vhcr->in_modifier); vhcr_cmd->status = CMD_STAT_BAD_OP; goto out_status; } /* Allocate outbox */ if (cmd->has_outbox) { outbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(outbox)) { vhcr_cmd->status = CMD_STAT_BAD_SIZE; outbox = NULL; goto out_status; } } /* Execute the command! */ if (cmd->wrapper) { err = cmd->wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (cmd->out_is_imm) vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param); } else { in_param = cmd->has_inbox ? (u64) inbox->dma : vhcr->in_param; out_param = cmd->has_outbox ? (u64) outbox->dma : vhcr->out_param; err = __mlx4_cmd(dev, in_param, &out_param, cmd->out_is_imm, vhcr->in_modifier, vhcr->op_modifier, vhcr->op, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (cmd->out_is_imm) { vhcr->out_param = out_param; vhcr_cmd->out_param = cpu_to_be64(vhcr->out_param); } } if (err) { if (!cmd->skip_err_print) mlx4_warn(dev, "vhcr command %s (0x%x) slave:%d " "in_param 0x%llx in_mod=0x%x, op_mod=0x%x " "failed with error:%d, status %d\n", cmd_to_str(vhcr->op), vhcr->op, slave, (unsigned long long) vhcr->in_param, vhcr->in_modifier, vhcr->op_modifier, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } /* Write outbox if command completed successfully */ if (cmd->has_outbox && !vhcr_cmd->status) { ret = mlx4_ACCESS_MEM(dev, outbox->dma, slave, vhcr->out_param, MLX4_MAILBOX_SIZE, MLX4_CMD_WRAPPED); if (ret) { /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ mlx4_err(dev, "%s: Failed writing outbox\n", __func__); goto out; } } out_status: /* DMA back vhcr result */ if (!in_vhcr) { ret = mlx4_ACCESS_MEM(dev, priv->mfunc.vhcr_dma, slave, priv->mfunc.master.slave_state[slave].vhcr_dma, ALIGN(sizeof(struct mlx4_vhcr), MLX4_ACCESS_MEM_ALIGN), MLX4_CMD_WRAPPED); if (ret) mlx4_err(dev, "%s:Failed writing vhcr result\n", __func__); else if (vhcr->e_bit && mlx4_GEN_EQE(dev, slave, &priv->mfunc.master.cmd_eqe)) mlx4_warn(dev, "Failed to generate command completion " "eqe for slave %d\n", slave); } out: kfree(vhcr); mlx4_free_cmd_mailbox(dev, inbox); mlx4_free_cmd_mailbox(dev, outbox); return ret; } static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, int slave, int port) { struct mlx4_vport_oper_state *vp_oper; struct mlx4_vport_state *vp_admin; struct mlx4_vf_immed_vlan_work *work; int err; int admin_vlan_ix = NO_INDX; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; if (vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos) return 0; work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) return -ENOMEM; if (vp_oper->state.default_vlan != vp_admin->default_vlan) { if (MLX4_VGT != vp_admin->default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &admin_vlan_ix); if (err) { mlx4_warn((&priv->dev), "No vlan resources slave %d, port %d\n", slave, port); return err; } } else { admin_vlan_ix = NO_INDX; } work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN; mlx4_dbg((&(priv->dev)), "alloc vlan %d idx %d slave %d port %d\n", (int)(vp_admin->default_vlan), admin_vlan_ix, slave, port); } /* save original vlan ix and vlan id */ work->orig_vlan_id = vp_oper->state.default_vlan; work->orig_vlan_ix = vp_oper->vlan_idx; /* handle new qos */ if (vp_oper->state.default_qos != vp_admin->default_qos) work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS; if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN) vp_oper->vlan_idx = admin_vlan_ix; vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; /* iterate over QPs owned by this slave, using UPDATE_QP */ work->port = port; work->slave = slave; work->qos = vp_oper->state.default_qos; work->vlan_id = vp_oper->state.default_vlan; work->vlan_ix = vp_oper->vlan_idx; work->priv = priv; INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); queue_work(priv->mfunc.master.comm_wq, &work->work); return 0; } static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) { int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; for (port = 1; port <= MLX4_MAX_PORTS; port++) { vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper->state = *vp_admin; if (MLX4_VGT != vp_admin->default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; mlx4_warn((&priv->dev), "No vlan resorces slave %d, port %d\n", slave, port); return err; } mlx4_dbg((&(priv->dev)), "alloc vlan %d idx %d slave %d port %d\n", (int)(vp_oper->state.default_vlan), vp_oper->vlan_idx, slave, port); } if (vp_admin->spoofchk) { vp_oper->mac_idx = __mlx4_register_mac(&priv->dev, port, vp_admin->mac); if (0 > vp_oper->mac_idx) { err = vp_oper->mac_idx; vp_oper->mac_idx = NO_INDX; mlx4_warn((&priv->dev), "No mac resources slave %d, port %d\n", slave, port); return err; } mlx4_dbg((&(priv->dev)), "alloc mac %llx idx %d slave %d port %d\n", (unsigned long long) vp_oper->state.mac, vp_oper->mac_idx, slave, port); } } return 0; } static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave) { int port; struct mlx4_vport_oper_state *vp_oper; for (port = 1; port <= MLX4_MAX_PORTS; port++) { vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (NO_INDX != vp_oper->vlan_idx) { __mlx4_unregister_vlan(&priv->dev, port, vp_oper->state.default_vlan); vp_oper->vlan_idx = NO_INDX; } if (NO_INDX != vp_oper->mac_idx) { __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac); vp_oper->mac_idx = NO_INDX; } } return; } static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, u16 param, u8 toggle) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state; u32 reply; u8 is_going_down = 0; int i; unsigned long flags; slave_state[slave].comm_toggle ^= 1; reply = (u32) slave_state[slave].comm_toggle << 31; if (toggle != slave_state[slave].comm_toggle) { mlx4_warn(dev, "Incorrect toggle %d from slave %d. *** MASTER" "STATE COMPROMISIED ***\n", toggle, slave); goto reset_slave; } if (cmd == MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; slave_state[slave].old_vlan_api = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; slave_state[slave].event_eq[i].token = 0; } /*check if we are in the middle of FLR process, if so return "retry" status to the slave*/ if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); /* write the version in the event field */ reply |= mlx4_comm_get_version(); goto reset_slave; } /*command from slave in the middle of FLR*/ if (cmd != MLX4_COMM_CMD_RESET && MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) { mlx4_warn(dev, "slave:%d is Trying to run cmd (0x%x) " "in the middle of FLR\n", slave, cmd); return; } switch (cmd) { case MLX4_COMM_CMD_VHCR0: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_RESET) goto reset_slave; slave_state[slave].vhcr_dma = ((u64) param) << 48; priv->mfunc.master.slave_state[slave].cookie = 0; break; case MLX4_COMM_CMD_VHCR1: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0) goto reset_slave; slave_state[slave].vhcr_dma |= ((u64) param) << 32; break; case MLX4_COMM_CMD_VHCR2: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR1) goto reset_slave; slave_state[slave].vhcr_dma |= ((u64) param) << 16; break; case MLX4_COMM_CMD_VHCR_EN: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR2) goto reset_slave; slave_state[slave].vhcr_dma |= param; if (mlx4_master_activate_admin_state(priv, slave)) goto reset_slave; slave_state[slave].active = true; mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) goto reset_slave; mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { mlx4_err(dev, "Failed processing vhcr for slave: %d," " resetting slave.\n", slave); mutex_unlock(&priv->cmd.slave_cmd_mutex); goto reset_slave; } mutex_unlock(&priv->cmd.slave_cmd_mutex); break; default: mlx4_warn(dev, "Bad comm cmd: %d from slave: %d\n", cmd, slave); goto reset_slave; } spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = cmd; else is_going_down = 1; spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); if (is_going_down) { mlx4_warn(dev, "Slave is going down aborting command (%d)" " executing from slave: %d\n", cmd, slave); return; } __raw_writel((__force u32) cpu_to_be32(reply), &priv->mfunc.comm[slave].slave_read); mmiowb(); return; reset_slave: /* cleanup any slave resources */ mlx4_delete_all_resources_for_slave(dev, slave); spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags); /*with slave in the middle of flr, no need to clean resources again.*/ inform_slave_state: __raw_writel((__force u32) cpu_to_be32(reply), &priv->mfunc.comm[slave].slave_read); wmb(); } /* master command processing */ void mlx4_master_comm_channel(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = container_of(work, struct mlx4_mfunc_master_ctx, comm_work); struct mlx4_mfunc *mfunc = container_of(master, struct mlx4_mfunc, master); struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); struct mlx4_dev *dev = &priv->dev; __be32 *bit_vec; u32 comm_cmd; u32 vec; int i, j, slave; int toggle; int served = 0; int reported = 0; u32 slt; bit_vec = master->comm_arm_bit_vector; for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) { vec = be32_to_cpu(bit_vec[i]); for (j = 0; j < 32; j++) { if (!(vec & (1 << j))) continue; ++reported; slave = (i * 32) + j; comm_cmd = swab32(readl( &mfunc->comm[slave].slave_write)); slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31; toggle = comm_cmd >> 31; if (toggle != slt) { if (master->slave_state[slave].comm_toggle != slt) { mlx4_info(dev, "slave %d out of sync." " read toggle %d, state toggle %d. " "Resynching.\n", slave, slt, master->slave_state[slave].comm_toggle); master->slave_state[slave].comm_toggle = slt; } mlx4_master_do_cmd(dev, slave, comm_cmd >> 16 & 0xff, comm_cmd & 0xffff, toggle); ++served; } else mlx4_err(dev, "slave %d out of sync." " read toggle %d, write toggle %d.\n", slave, slt, toggle); } } if (reported && reported != served) mlx4_warn(dev, "Got command event with bitmask from %d slaves" " but %d were served\n", reported, served); } /* master command processing */ void mlx4_master_arm_comm_channel(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = container_of(work, struct mlx4_mfunc_master_ctx, arm_comm_work); struct mlx4_mfunc *mfunc = container_of(master, struct mlx4_mfunc, master); struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); struct mlx4_dev *dev = &priv->dev; if (mlx4_ARM_COMM_CHANNEL(dev)) mlx4_warn(dev, "Failed to arm comm channel events\n"); } static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int wr_toggle; int rd_toggle; unsigned long end; wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; if (rd_toggle == wr_toggle) { priv->cmd.comm_toggle = rd_toggle; return 0; } cond_resched(); } /* * we could reach here if for example the previous VM using this * function misbehaved and left the channel with unsynced state. We * should fix this here and give this VM a chance to use a properly * synced channel */ mlx4_warn(dev, "recovering from previously mis-behaved VM\n"); __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read); __raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write); priv->cmd.comm_toggle = 0; return 0; } int mlx4_multi_func_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; int i, j, err, port; if (mlx4_is_master(dev)) priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + priv->fw.comm_base, MLX4_COMM_PAGESIZE); else priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, 2) + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); if (!priv->mfunc.comm) { mlx4_err(dev, "Couldn't map communication vector.\n"); goto err_vhcr; } if (mlx4_is_master(dev)) { priv->mfunc.master.slave_state = kzalloc(dev->num_slaves * sizeof(struct mlx4_slave_state), GFP_KERNEL); if (!priv->mfunc.master.slave_state) goto err_comm; priv->mfunc.master.vf_admin = kzalloc(dev->num_slaves * sizeof(struct mlx4_vf_admin_state), GFP_KERNEL); if (!priv->mfunc.master.vf_admin) goto err_comm_admin; priv->mfunc.master.vf_oper = kzalloc(dev->num_slaves * sizeof(struct mlx4_vf_oper_state), GFP_KERNEL); if (!priv->mfunc.master.vf_oper) goto err_comm_oper; for (i = 0; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; __raw_writel((__force u32) 0, &priv->mfunc.comm[i].slave_write); __raw_writel((__force u32) 0, &priv->mfunc.comm[i].slave_read); mmiowb(); for (port = 1; port <= MLX4_MAX_PORTS; port++) { s_state->vlan_filter[port] = kzalloc(sizeof(struct mlx4_vlan_fltr), GFP_KERNEL); if (!s_state->vlan_filter[port]) { if (--port) kfree(s_state->vlan_filter[port]); goto err_slaves; } INIT_LIST_HEAD(&s_state->mcast_filters[port]); priv->mfunc.master.vf_admin[i].vport[port].default_vlan = MLX4_VGT; priv->mfunc.master.vf_oper[i].vport[port].state.default_vlan = MLX4_VGT; priv->mfunc.master.vf_oper[i].vport[port].vlan_idx = NO_INDX; priv->mfunc.master.vf_oper[i].vport[port].mac_idx = NO_INDX; } spin_lock_init(&s_state->lock); } memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size); priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; INIT_WORK(&priv->mfunc.master.comm_work, mlx4_master_comm_channel); INIT_WORK(&priv->mfunc.master.arm_comm_work, mlx4_master_arm_comm_channel); INIT_WORK(&priv->mfunc.master.slave_event_work, mlx4_gen_slave_eqe); INIT_WORK(&priv->mfunc.master.slave_flr_event_work, mlx4_master_handle_slave_flr); spin_lock_init(&priv->mfunc.master.slave_state_lock); spin_lock_init(&priv->mfunc.master.slave_eq.event_lock); priv->mfunc.master.comm_wq = create_singlethread_workqueue("mlx4_comm"); if (!priv->mfunc.master.comm_wq) goto err_slaves; if (mlx4_init_resource_tracker(dev)) goto err_thread; err = mlx4_ARM_COMM_CHANNEL(dev); if (err) { mlx4_err(dev, " Failed to arm comm channel eq: %x\n", err); goto err_resource; } } else { err = sync_toggles(dev); if (err) { mlx4_err(dev, "Couldn't sync toggles\n"); goto err_comm; } } return 0; err_resource: mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); err_slaves: while (--i) { for (port = 1; port <= MLX4_MAX_PORTS; port++) kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); } kfree(priv->mfunc.master.vf_oper); err_comm_oper: kfree(priv->mfunc.master.vf_admin); err_comm_admin: kfree(priv->mfunc.master.slave_state); err_comm: iounmap(priv->mfunc.comm); err_vhcr: dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; return -ENOMEM; } int mlx4_cmd_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; priv->cmd.toggle = 1; priv->cmd.hcr = NULL; priv->mfunc.vhcr = NULL; if (!mlx4_is_slave(dev)) { priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register.\n"); return -ENOMEM; } } if (mlx4_is_mfunc(dev)) { priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) { mlx4_err(dev, "Couldn't allocate VHCR.\n"); goto err_hcr; } } priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) goto err_vhcr; return 0; err_vhcr: if (mlx4_is_mfunc(dev)) dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; err_hcr: if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); return -ENOMEM; } void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, port; if (mlx4_is_master(dev)) { flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); for (i = 0; i < dev->num_slaves; i++) { for (port = 1; port <= MLX4_MAX_PORTS; port++) kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]); } kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); } iounmap(priv->mfunc.comm); } void mlx4_cmd_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); pci_pool_destroy(priv->cmd.pool); if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); if (mlx4_is_mfunc(dev)) dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; } /* * Switch to using events to issue FW commands (can only be called * after event queue for command events has been initialized). */ int mlx4_cmd_use_events(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; int err = 0; priv->cmd.context = kmalloc(priv->cmd.max_cmds * sizeof (struct mlx4_cmd_context), GFP_KERNEL); if (!priv->cmd.context) return -ENOMEM; for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; priv->cmd.free_head = 0; sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); spin_lock_init(&priv->cmd.context_lock); for (priv->cmd.token_mask = 1; priv->cmd.token_mask < priv->cmd.max_cmds; priv->cmd.token_mask <<= 1) ; /* nothing */ --priv->cmd.token_mask; down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; return err; } /* * Switch back to polling (used when shutting down the device) */ void mlx4_cmd_use_polling(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i; priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) down(&priv->cmd.event_sem); kfree(priv->cmd.context); up(&priv->cmd.poll_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL); if (!mailbox) return ERR_PTR(-ENOMEM); mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); } memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE); return mailbox; } EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox); void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox) { if (!mailbox) return; pci_pool_free(mlx4_priv(dev)->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } EXPORT_SYMBOL_GPL(mlx4_free_cmd_mailbox); u32 mlx4_comm_get_version(void) { return ((u32) CMD_CHAN_IF_REV << 8) | (u32) CMD_CHAN_VER; } static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) { if ((vf < 0) || (vf >= dev->num_vfs)) { mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); return -EINVAL; } return (vf+1); } int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; int slave; if (!mlx4_is_master(dev)) return -EPROTONOSUPPORT; slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->mac = mlx4_mac_to_u64(mac); mlx4_info(dev, "default mac on vf %d port %d to %llX will take afect only after vf restart\n", vf, port, (unsigned long long) s_info->mac); return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_oper_state *vf_oper; struct mlx4_vport_state *vf_admin; int slave; if ((!mlx4_is_master(dev)) || !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL)) return -EPROTONOSUPPORT; if ((vlan > 4095) || (qos > 7)) return -EINVAL; slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if ((0 == vlan) && (0 == qos)) vf_admin->default_vlan = MLX4_VGT; else vf_admin->default_vlan = vlan; vf_admin->default_qos = qos; if (priv->mfunc.master.slave_state[slave].active && dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { mlx4_info(dev, "updating vf %d port %d config params immediately\n", vf, port); mlx4_master_immediate_activate_vlan_qos(priv, slave, port); } return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); /* mlx4_get_slave_default_vlan - * retrun true if VST ( default vlan) * if VST will fill vlan & qos (if not NULL) */ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos) { struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; priv = mlx4_priv(dev); vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (MLX4_VGT != vp_oper->state.default_vlan) { if (vlan) *vlan = vp_oper->state.default_vlan; if (qos) *qos = vp_oper->state.default_qos; return true; } return false; } EXPORT_SYMBOL_GPL(mlx4_get_slave_default_vlan); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; int slave; if ((!mlx4_is_master(dev)) || !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FSM)) return -EPROTONOSUPPORT; slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->spoofchk = setting; return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_spoofchk); int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; struct mlx4_vport_oper_state *vp_oper; int slave; u8 link_stat_event; slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; switch (link_state) { case IFLA_VF_LINK_STATE_AUTO: /* get link curent state */ if (!priv->sense.do_sense_port[port]) link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; else link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; break; case IFLA_VF_LINK_STATE_ENABLE: link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; break; case IFLA_VF_LINK_STATE_DISABLE: link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; break; default: mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", link_state, slave, port); return -EINVAL; }; /* update the admin & oper state on the link state */ s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; s_info->link_state = link_state; vp_oper->state.link_state = link_state; /* send event */ mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event); return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); int mlx4_get_vf_link_state(struct mlx4_dev *dev, int port, int vf) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *s_info; int slave; if (!mlx4_is_master(dev)) return -EPROTONOSUPPORT; slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; return s_info->link_state; } EXPORT_SYMBOL_GPL(mlx4_get_vf_link_state); Index: stable/9/sys/ofed/drivers/net/mlx4/en_main.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/en_main.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/en_main.c (revision 279734) @@ -1,351 +1,351 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #include #include #include #include "mlx4_en.h" MODULE_AUTHOR("Liran Liss, Yevgeny Petrilin"); MODULE_DESCRIPTION("Mellanox ConnectX HCA Ethernet driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION " ("DRV_RELDATE")"); static const char mlx4_en_version[] = DRV_NAME ": Mellanox ConnectX HCA Ethernet driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; #define MLX4_EN_PARM_INT(X, def_val, desc) \ static unsigned int X = def_val;\ module_param(X , uint, 0444); \ MODULE_PARM_DESC(X, desc); /* * Device scope module parameters */ /* Enable RSS UDP traffic */ MLX4_EN_PARM_INT(udp_rss, 1, "Enable RSS for incoming UDP traffic"); /* Priority pausing */ MLX4_EN_PARM_INT(pfctx, 0, "Priority based Flow Control policy on TX[7:0]." " Per priority bit mask"); MLX4_EN_PARM_INT(pfcrx, 0, "Priority based Flow Control policy on RX[7:0]." " Per priority bit mask"); #define MAX_PFC_TX 0xff #define MAX_PFC_RX 0xff static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) { struct mlx4_en_profile *params = &mdev->profile; int i; params->udp_rss = udp_rss; params->num_tx_rings_p_up = min_t(int, mp_ncpus, MLX4_EN_MAX_TX_RING_P_UP); if (params->udp_rss && !(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) { mlx4_warn(mdev, "UDP RSS is not supported on this device.\n"); params->udp_rss = 0; } for (i = 1; i <= MLX4_MAX_PORTS; i++) { params->prof[i].rx_pause = 1; params->prof[i].rx_ppp = pfcrx; params->prof[i].tx_pause = 1; params->prof[i].tx_ppp = pfctx; params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].tx_ring_num = params->num_tx_rings_p_up * MLX4_EN_NUM_UP; params->prof[i].rss_rings = 0; } return 0; } static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) { struct mlx4_en_dev *endev = ctx; return endev->pndev[port]; } static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr, enum mlx4_dev_event event, unsigned long port) { struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr; struct mlx4_en_priv *priv; switch (event) { case MLX4_DEV_EVENT_PORT_UP: case MLX4_DEV_EVENT_PORT_DOWN: if (!mdev->pndev[port]) return; priv = netdev_priv(mdev->pndev[port]); /* To prevent races, we poll the link state in a separate task rather than changing it here */ priv->link_state = event; queue_work(mdev->workqueue, &priv->linkstate_task); break; case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: mlx4_err(mdev, "Internal error detected, restarting device\n"); break; case MLX4_DEV_EVENT_SLAVE_INIT: case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: break; default: if (port < 1 || port > dev->caps.num_ports || !mdev->pndev[port]) return; mlx4_warn(mdev, "Unhandled event %d for port %d\n", event, (int) port); } } static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) { struct mlx4_en_dev *mdev = endev_ptr; int i, ret; mutex_lock(&mdev->state_lock); mdev->device_up = false; mutex_unlock(&mdev->state_lock); mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) if (mdev->pndev[i]) mlx4_en_destroy_netdev(mdev->pndev[i]); flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); ret = mlx4_mr_free(dev, &mdev->mr); if (ret) mlx4_err(mdev, "Error deregistering MR. The system may have become unstable."); iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); kfree(mdev); } static void *mlx4_en_add(struct mlx4_dev *dev) { struct mlx4_en_dev *mdev; int i; int err; printk_once(KERN_INFO "%s", mlx4_en_version); mdev = kzalloc(sizeof *mdev, GFP_KERNEL); if (!mdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed, " "aborting.\n"); err = -ENOMEM; goto err_free_res; } if (mlx4_pd_alloc(dev, &mdev->priv_pdn)) goto err_free_dev; if (mlx4_uar_alloc(dev, &mdev->priv_uar)) goto err_pd; mdev->uar_map = ioremap((phys_addr_t) mdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!mdev->uar_map) goto err_uar; spin_lock_init(&mdev->uar_lock); mdev->dev = dev; mdev->dma_device = &(dev->pdev->dev); mdev->pdev = dev->pdev; mdev->device_up = false; mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); if (!mdev->LSO_support) mlx4_warn(mdev, "LSO not supported, please upgrade to later " "FW version to enable LSO\n"); if (mlx4_mr_alloc(mdev->dev, mdev->priv_pdn, 0, ~0ull, MLX4_PERM_LOCAL_WRITE | MLX4_PERM_LOCAL_READ, 0, 0, &mdev->mr)) { mlx4_err(mdev, "Failed allocating memory region\n"); goto err_map; } if (mlx4_mr_enable(mdev->dev, &mdev->mr)) { mlx4_err(mdev, "Failed enabling memory region\n"); goto err_mr; } /* Build device profile according to supplied module parameters */ err = mlx4_en_get_profile(mdev); if (err) { mlx4_err(mdev, "Bad module parameters, aborting.\n"); goto err_mr; } /* Configure which ports to start according to module parameters */ mdev->port_cnt = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) mdev->port_cnt++; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { if (!dev->caps.comp_pool) { mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(max_t(int, MIN_RX_RINGS, min_t(int, dev->caps.num_comp_vectors, DEF_RX_RINGS))); } else { mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two( - min_t(int, dev->caps.comp_pool/ - dev->caps.num_ports - 1 , MAX_MSIX_P_PORT - 1)); + min_t(int, dev->caps.comp_pool / + dev->caps.num_ports, MAX_MSIX_P_PORT)); } } /* Create our own workqueue for reset/multicast tasks * Note: we cannot use the shared workqueue because of deadlocks caused * by the rtnl lock */ mdev->workqueue = create_singlethread_workqueue("mlx4_en"); if (!mdev->workqueue) { err = -ENOMEM; goto err_mr; } /* At this stage all non-port specific tasks are complete: * mark the card state as up */ mutex_init(&mdev->state_lock); mdev->device_up = true; /* Setup ports */ /* Create a netdev for each port */ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) { mlx4_info(mdev, "Activating port:%d\n", i); if (mlx4_en_init_netdev(mdev, i, &mdev->profile.prof[i])) mdev->pndev[i] = NULL; } return mdev; err_mr: err = mlx4_mr_free(dev, &mdev->mr); if (err) mlx4_err(mdev, "Error deregistering MR. The system may have become unstable."); err_map: if (mdev->uar_map) iounmap(mdev->uar_map); err_uar: mlx4_uar_free(dev, &mdev->priv_uar); err_pd: mlx4_pd_free(dev, mdev->priv_pdn); err_free_dev: kfree(mdev); err_free_res: return NULL; } static struct mlx4_interface mlx4_en_interface = { .add = mlx4_en_add, .remove = mlx4_en_remove, .event = mlx4_en_event, .get_dev = mlx4_en_get_netdev, .protocol = MLX4_PROT_ETH, }; static void mlx4_en_verify_params(void) { if (pfctx > MAX_PFC_TX) { pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - " "should be in range 0-0x%x, will be changed to default (0)\n", pfctx, MAX_PFC_TX); pfctx = 0; } if (pfcrx > MAX_PFC_RX) { pr_warn("mlx4_en: WARNING: illegal module parameter pfcrx 0x%x - " "should be in range 0-0x%x, will be changed to default (0)\n", pfcrx, MAX_PFC_RX); pfcrx = 0; } } static int __init mlx4_en_init(void) { mlx4_en_verify_params(); #ifdef CONFIG_DEBUG_FS int err = 0; err = mlx4_en_register_debugfs(); if (err) pr_err(KERN_ERR "Failed to register debugfs\n"); #endif return mlx4_register_interface(&mlx4_en_interface); } static void __exit mlx4_en_cleanup(void) { mlx4_unregister_interface(&mlx4_en_interface); #ifdef CONFIG_DEBUG_FS mlx4_en_unregister_debugfs(); #endif } module_init(mlx4_en_init); module_exit(mlx4_en_cleanup); #undef MODULE_VERSION #include static int mlxen_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlxen_mod = { .name = "mlxen", .evhand = mlxen_evhand, }; DECLARE_MODULE(mlxen, mlxen_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); MODULE_DEPEND(mlxen, mlx4, 1, 1, 1); Index: stable/9/sys/ofed/drivers/net/mlx4/en_netdev.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/en_netdev.c (revision 279734) @@ -1,2594 +1,2594 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include #include #include #include #include #include #include #include #include "mlx4_en.h" #include "en_port.h" static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); static int mlx4_en_unit; #ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; int done; if (!priv->port_up) return LL_FLUSH_FAILED; if (!mlx4_en_cq_lock_poll(cq)) return LL_FLUSH_BUSY; done = mlx4_en_process_rx_cq(dev, cq, 4); #ifdef LL_EXTENDED_STATS if (done) rx_ring->cleaned += done; else rx_ring->misses++; #endif mlx4_en_cq_unlock_poll(cq); return done; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { struct list_head next; struct work_struct work; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int rxq_index; struct mlx4_en_priv *priv; u32 flow_id; /* RFS infrastructure id */ int id; /* mlx4_en driver id */ u64 reg_id; /* Flow steering API id */ u8 activated; /* Used to prevent expiry before filter * is attached */ struct hlist_node filter_chain; }; static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) { switch (ip_proto) { case IPPROTO_UDP: return MLX4_NET_TRANS_RULE_ID_UDP; case IPPROTO_TCP: return MLX4_NET_TRANS_RULE_ID_TCP; default: return -EPROTONOSUPPORT; } }; static void mlx4_en_filter_work(struct work_struct *work) { struct mlx4_en_filter *filter = container_of(work, struct mlx4_en_filter, work); struct mlx4_en_priv *priv = filter->priv; struct mlx4_spec_list spec_tcp_udp = { .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), { .tcp_udp = { .dst_port = filter->dst_port, .dst_port_msk = (__force __be16)-1, .src_port = filter->src_port, .src_port_msk = (__force __be16)-1, }, }, }; struct mlx4_spec_list spec_ip = { .id = MLX4_NET_TRANS_RULE_ID_IPV4, { .ipv4 = { .dst_ip = filter->dst_ip, .dst_ip_msk = (__force __be32)-1, .src_ip = filter->src_ip, .src_ip_msk = (__force __be32)-1, }, }, }; struct mlx4_spec_list spec_eth = { .id = MLX4_NET_TRANS_RULE_ID_ETH, }; struct mlx4_net_trans_rule rule = { .list = LIST_HEAD_INIT(rule.list), .queue_mode = MLX4_NET_TRANS_Q_LIFO, .exclusive = 1, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .port = priv->port, .priority = MLX4_DOMAIN_RFS, }; int rc; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); if (spec_tcp_udp.id < 0) { en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", filter->ip_proto); goto ignore; } list_add_tail(&spec_eth.list, &rule.list); list_add_tail(&spec_ip.list, &rule.list); list_add_tail(&spec_tcp_udp.list, &rule.list); rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); filter->activated = 0; if (filter->reg_id) { rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); } rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); if (rc) en_err(priv, "Error attaching flow. err = %d\n", rc); ignore: mlx4_en_filter_rfs_expire(priv); filter->activated = 1; } static inline struct hlist_head * filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { unsigned long l; int bucket_idx; l = (__force unsigned long)src_port | ((__force unsigned long)dst_port << 2); l ^= (__force unsigned long)(src_ip ^ dst_ip); bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); return &priv->filter_hash[bucket_idx]; } static struct mlx4_en_filter * mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port, u32 flow_id) { struct mlx4_en_filter *filter = NULL; filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); if (!filter) return NULL; filter->priv = priv; filter->rxq_index = rxq_index; INIT_WORK(&filter->work, mlx4_en_filter_work); filter->src_ip = src_ip; filter->dst_ip = dst_ip; filter->ip_proto = ip_proto; filter->src_port = src_port; filter->dst_port = dst_port; filter->flow_id = flow_id; filter->id = priv->last_filter_id++ % RPS_NO_FILTER; list_add_tail(&filter->next, &priv->filters); hlist_add_head(&filter->filter_chain, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port)); return filter; } static void mlx4_en_filter_free(struct mlx4_en_filter *filter) { struct mlx4_en_priv *priv = filter->priv; int rc; list_del(&filter->next); rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); kfree(filter); } static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port) { struct hlist_node *elem; struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; hlist_for_each_entry(filter, elem, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { if (filter->src_ip == src_ip && filter->dst_ip == dst_ip && filter->ip_proto == ip_proto && filter->src_port == src_port && filter->dst_port == dst_port) { ret = filter; break; } } return ret; } static int mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx4_en_priv *priv = netdev_priv(net_dev); struct mlx4_en_filter *filter; const struct iphdr *ip; const __be16 *ports; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int nhoff = skb_network_offset(skb); int ret = 0; if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; ip = (const struct iphdr *)(skb->data + nhoff); if (ip_is_fragment(ip)) return -EPROTONOSUPPORT; if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) return -EPROTONOSUPPORT; ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); ip_proto = ip->protocol; src_ip = ip->saddr; dst_ip = ip->daddr; src_port = ports[0]; dst_port = ports[1]; spin_lock_bh(&priv->filters_lock); filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, src_port, dst_port); if (filter) { if (filter->rxq_index == rxq_index) goto out; filter->rxq_index = rxq_index; } else { filter = mlx4_en_filter_alloc(priv, rxq_index, src_ip, dst_ip, ip_proto, src_port, dst_port, flow_id); if (!filter) { ret = -ENOMEM; goto err; } } queue_work(priv->mdev->workqueue, &filter->work); out: ret = filter->id; err: spin_unlock_bh(&priv->filters_lock); return ret; } void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *rx_ring) { struct mlx4_en_filter *filter, *tmp; LIST_HEAD(del_list); spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) { cancel_work_sync(&filter->work); mlx4_en_filter_free(filter); } } static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; LIST_HEAD(del_list); int i = 0; spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) break; if (filter->activated && !work_pending(&filter->work) && rps_may_expire_flow(priv->dev, filter->rxq_index, filter->flow_id, filter->id)) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } else last_filter = filter; i++; } if (last_filter && (&last_filter->next != priv->filters.next)) list_move(&priv->filters, &last_filter->next); spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) mlx4_en_filter_free(filter); } #endif static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; int idx; if (arg != priv) return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); set_bit(vid, priv->active_vlans); /* Add VID to port VLAN filter */ mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; if (arg != priv) return; en_dbg(HW, priv, "Killing VID:%d\n", vid); clear_bit(vid, priv->active_vlans); /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } mutex_unlock(&mdev->state_lock); } static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, unsigned char *mac, int *qpn, u64 *reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int err; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = *qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec_eth = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.port = priv->port; rule.qpn = *qpn; INIT_LIST_HEAD(&rule.list); spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); list_add_tail(&spec_eth.list, &rule.list); err = mlx4_flow_attach(dev, &rule, reg_id); break; } default: return -EINVAL; } if (err) en_warn(priv, "Failed Attaching Unicast\n"); return err; } static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, unsigned char *mac, int qpn, u64 reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { mlx4_flow_detach(dev, reg_id); break; } default: en_err(priv, "Invalid steering mode.\n"); } } static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; struct mlx4_mac_entry *entry; int index = 0; int err = 0; u64 reg_id; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", IF_LLADDR(priv->dev)); index = mlx4_register_mac(dev, priv->port, mac); if (index < 0) { err = index; en_err(priv, "Failed adding MAC: %pM\n", IF_LLADDR(priv->dev)); return err; } if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; return 0; } err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); goto qp_err; } err = mlx4_en_uc_steer_add(priv, IF_LLADDR(priv->dev), qpn, ®_id); if (err) goto steer_err; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { err = -ENOMEM; goto alloc_err; } memcpy(entry->mac, IF_LLADDR(priv->dev), sizeof(entry->mac)); entry->reg_id = reg_id; hlist_add_head(&entry->hlist, &priv->mac_hash[entry->mac[MLX4_EN_MAC_HASH_IDX]]); return 0; alloc_err: mlx4_en_uc_steer_release(priv, IF_LLADDR(priv->dev), *qpn, reg_id); steer_err: mlx4_qp_release_range(dev, *qpn, 1); qp_err: mlx4_unregister_mac(dev, priv->port, mac); return err; } static void mlx4_en_put_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; u64 mac; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", IF_LLADDR(priv->dev)); mlx4_unregister_mac(dev, priv->port, mac); } else { struct mlx4_mac_entry *entry; struct hlist_node *n, *tmp; struct hlist_head *bucket; unsigned int i; for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) { bucket = &priv->mac_hash[i]; hlist_for_each_entry_safe(entry, n, tmp, bucket, hlist) { mac = mlx4_mac_to_u64(entry->mac); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", entry->mac); mlx4_en_uc_steer_release(priv, entry->mac, qpn, entry->reg_id); mlx4_unregister_mac(dev, priv->port, mac); hlist_del(&entry->hlist); kfree(entry); } } en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; } } static void mlx4_en_clear_list(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_mc_list *tmp, *mc_to_del; list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { list_del(&mc_to_del->list); kfree(mc_to_del); } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct ifmultiaddr *ifma; struct mlx4_en_mc_list *tmp; struct mlx4_en_priv *priv = netdev_priv(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; /* Make sure the list didn't grow. */ tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC); memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->mc_list); } } static void update_mclist_flags(struct mlx4_en_priv *priv, struct list_head *dst, struct list_head *src) { struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc; bool found; /* Find all the entries that should be removed from dst, * These are the entries that are not found in src */ list_for_each_entry(dst_tmp, dst, list) { found = false; list_for_each_entry(src_tmp, src, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { found = true; break; } } if (!found) dst_tmp->action = MCLIST_REM; } /* Add entries that exist in src but not in dst * mark them as need to add */ list_for_each_entry(src_tmp, src, list) { found = false; list_for_each_entry(dst_tmp, dst, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { dst_tmp->action = MCLIST_NONE; found = true; break; } } if (!found) { new_mc = kmalloc(sizeof(struct mlx4_en_mc_list), GFP_KERNEL); if (!new_mc) { en_err(priv, "Failed to allocate current multicast list\n"); return; } memcpy(new_mc, src_tmp, sizeof(struct mlx4_en_mc_list)); new_mc->action = MCLIST_ADD; list_add_tail(&new_mc->list, dst); } } } static void mlx4_en_set_rx_mode(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->rx_mode_task); } static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling unicast promiscuous mode\n"); /* Add the default qp number as multicast * promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling multicast promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); break; } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } } static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling unicast promiscuous mode\n"); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); break; } } static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, struct net_device *dev, struct mlx4_en_dev *mdev) { struct mlx4_en_mc_list *mclist, *tmp; u8 mc_list[16] = {0}; int err = 0; u64 mcast_addr = 0; /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mlx4_en_cache_mclist(dev); list_for_each_entry(mclist, &priv->mc_list, list) { mcast_addr = mlx4_mac_to_u64(mclist->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); update_mclist_flags(priv, &priv->curr_list, &priv->mc_list); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { if (mclist->action == MCLIST_REM) { /* detach this address and delete from list */ memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); if (err) en_err(priv, "Fail to detach multicast address\n"); /* remove from list */ list_del(&mclist->list); kfree(mclist); } else if (mclist->action == MCLIST_ADD) { /* attach the address */ memcpy(&mc_list[10], mclist->addr, ETH_ALEN); /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &mclist->reg_id); if (err) en_err(priv, "Fail to attach multicast address\n"); } } } } static void mlx4_en_do_set_rx_mode(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, rx_mode_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); goto out; } if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { if (priv->port_state.link_state) { priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; /* Important note: the following call for if_link_state_change * is needed for interface up scenario (start port, link state * change) */ /* update netif baudrate */ priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); if_link_state_change(priv->dev, LINK_STATE_UP); en_dbg(HW, priv, "Link Up\n"); } } /* Promsicuous mode: disable all filters */ if ((dev->if_flags & IFF_PROMISC) || (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { mlx4_en_set_promisc_mode(priv, mdev); goto out; } /* Not in promiscuous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) mlx4_en_clear_promisc_mode(priv, mdev); mlx4_en_do_multicast(priv, dev, mdev); out: mutex_unlock(&mdev->state_lock); } #ifdef CONFIG_NET_POLL_CONTROLLER static void mlx4_en_netpoll(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_cq *cq; unsigned long flags; int i; for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; spin_lock_irqsave(&cq->lock, flags); napi_synchronize(&cq->napi); mlx4_en_process_rx_cq(dev, cq, 0); spin_unlock_irqrestore(&cq->lock, flags); } } #endif static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coelsing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; priv->tx_frames = MLX4_EN_TX_COAL_PKTS; priv->tx_usecs = MLX4_EN_TX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu: %u - " "rx_frames:%d rx_usecs:%d\n", (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = priv->tx_cq[i]; cq->moder_cnt = priv->tx_frames; cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring]->packets; rx_bytes = priv->rx_ring[ring]->bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = priv->rx_cq[ring]; cq->moder_time = moder_time; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats\n"); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mutex_unlock(&mdev->state_lock); } /* mlx4_en_service_task - Run service task for tasks that needed to be done * periodically */ static void mlx4_en_service_task(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, service_task); struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); if (mdev->device_up) { queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { en_info(priv, "Link Down\n"); if_link_state_change(priv->dev, LINK_STATE_DOWN); /* update netif baudrate */ priv->dev->if_baudrate = 0; /* make sure the port is up before notifying the OS. * This is tricky since we get here on INIT_PORT and * in such case we can't tell the OS the port is up. * To solve this there is a call to if_link_state_change * in set_rx_mode. * */ } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) en_info(priv, "Query port failed\n"); priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; u8 mc_list[16] = {0}; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } INIT_LIST_HEAD(&priv->mc_list); INIT_LIST_HEAD(&priv->curr_list); INIT_LIST_HEAD(&priv->ethtool_list); /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); priv->rx_alloc_size = max_t(int, 2 * roundup_pow_of_two(priv->rx_mb_size), PAGE_SIZE); priv->rx_alloc_order = get_order(priv->rx_alloc_size); priv->rx_buf_size = roundup_pow_of_two(priv->rx_mb_size); priv->log_rx_info = ROUNDUP_LOG2(sizeof(struct mlx4_en_rx_buf)); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; mlx4_en_cq_init_lock(cq); err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i]->cqn = cq->mcq.cqn; ++rx_index; } /* Set qp number */ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); err = mlx4_en_get_qp(priv); if (err) { en_err(priv, "Failed getting eth qp\n"); goto cq_err; } mdev->mac_removed[priv->port] = 0; /* gets default allocated counter index from func cap */ /* or sink counter index if no resources */ priv->counter_index = mdev->dev->caps.def_counter_index[priv->port - 1]; en_dbg(DRV, priv, "%s: default counter index %d for port %d\n", __func__, priv->counter_index, priv->port); err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto mac_err; } err = mlx4_en_create_drop_qp(priv); if (err) goto rss_err; /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq, i); if (err) { - en_err(priv, "Failed allocating Tx CQ\n"); + en_err(priv, "Failed activating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, i / priv->num_tx_rings_p_up); if (err) { - en_err(priv, "Failed allocating Tx ring\n"); + en_err(priv, "Failed activating Tx ring %d\n", i); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = 0xffffffff; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto tx_err; } /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); mlx4_set_stats_bitmap(mdev->dev, priv->stats_bitmap); priv->port_up = true; /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; #ifdef CONFIG_DEBUG_FS mlx4_en_create_debug_files(priv); #endif callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); } mlx4_en_destroy_drop_qp(priv); rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_en_put_qp(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_mc_list *mclist, *tmp; int i; u8 mc_list[16] = {0}; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } #ifdef CONFIG_DEBUG_FS mlx4_en_delete_debug_files(priv); #endif /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); /* Set port as not active */ priv->port_up = false; if (priv->counter_index != 0xff) { mlx4_counter_free(mdev->dev, priv->port, priv->counter_index); priv->counter_index = 0xff; } /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } } /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(mclist, &priv->curr_list, list) { memcpy(&mc_list[10], mclist->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); } mlx4_en_clear_list(dev); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { list_del(&mclist->list); kfree(mclist); } /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); } callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev); //for (i = 0; i < priv->tx_ring_num; i++) // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_clear_stats(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!mlx4_is_slave(mdev->dev)) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); memset(&priv->vport_stats, 0, sizeof(priv->vport_stats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; } } static void mlx4_en_open(void* arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct net_device *dev; int err = 0; priv = arg; mdev = priv->mdev; dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); goto out; } /* Reset HW statistics and SW counters */ mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port:%d\n", priv->port); out: mutex_unlock(&mdev->state_lock); return; } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; #ifdef CONFIG_RFS_ACCEL if (priv->dev->rx_cpu_rmap) { free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; } #endif for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring && priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq && priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], priv->prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } if (priv->sysctl) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; int node = 0; /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, node)) goto err; } /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE, node, i)) goto err; } #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); if (!priv->dev->rx_cpu_rmap) goto err; #endif /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } priv->port_up = false; return -ENOMEM; } struct en_port_attribute { struct attribute attr; ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf); ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count); }; #define PORT_ATTR_RO(_name) \ struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name) #define EN_PORT_ATTR(_name, _mode, _show, _store) \ struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store) void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) ether_ifdetach(dev); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); mutex_lock(&mdev->state_lock); mlx4_en_stop_port(dev); mutex_unlock(&mdev->state_lock); cancel_delayed_work(&priv->stats_task); cancel_delayed_work(&priv->service_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */ if (priv->sysctl) sysctl_ctx_free(&priv->conf_ctx); kfree(priv->tx_ring); kfree(priv->tx_cq); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n", (unsigned)dev->if_mtu, (unsigned)new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_stop_port(dev); err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); /* * [ShaharK] mlx4_en_QUERY_PORT sleeps and cannot be called under a * non-sleepable lock. * I moved it to the periodic mlx4_en_do_get_stats. if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return (active); */ active |= IFM_FDX; trans_type = priv->port_state.transciver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: case IFM_40G_CR4: if ((IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv))) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; error = 0; mask = 0; priv = dev->if_softc; mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: mutex_lock(&mdev->state_lock); if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) mlx4_en_start_port(dev); else mlx4_en_set_rx_mode(dev); } else { if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); } } mutex_unlock(&mdev->state_lock); break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_rx_mode(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_HWCSUM) dev->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) dev->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) dev->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_start_port(dev); mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; default: error = ether_ioctl(dev, command, data); break; } return (error); } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { en_err(priv, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1)); dev->if_mtu = ETHERMTU; dev->if_init = mlx4_en_open; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->counter_index = 0xff; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); callout_init(&priv->watchdog_timer, 1); #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); #endif priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->dev = dev; priv->mdev = mdev; priv->ddev = &mdev->pdev->dev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->flags = prof->flags; priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; priv->tx_ring = kcalloc(MAX_TX_RINGS, sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL); if (!priv->tx_ring) { err = -ENOMEM; goto out; } priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq) { err = -ENOMEM; goto out; } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->last_ifq_jiffies = 0; priv->if_counters_rx_errors = 0; priv->if_counters_rx_no_buffer = 0; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { priv->dcbx_cap = DCB_CAP_DCBX_HOST; priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { en_info(priv, "QoS disabled - no HW support\n"); dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; } } #endif for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) INIT_HLIST_HEAD(&priv->mac_hash[i]); /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { #if BITS_PER_LONG == 64 en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n", priv->port, priv->mac); #elif BITS_PER_LONG == 32 en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", priv->port, priv->mac); #endif err = -EINVAL; goto out; } priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE); mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; dev->if_capabilities |= IFCAP_LRO; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO; /* set TSO limits so that we don't have to drop TX packets */ dev->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); dev->if_hw_tsomaxsegcount = 16; dev->if_hw_tsomaxsegsize = 65536; /* XXX can do up to 4GByte */ dev->if_capenable = dev->if_capabilities; dev->if_hwassist = 0; if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; mlx4_en_set_default_moderation(priv); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); if_link_state_change(dev, LINK_STATE_DOWN); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, prof->tx_pause, prof->tx_ppp, prof->rx_pause, prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto out; } /* Init port */ en_warn(priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto out; } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } + static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && tx_size == priv->tx_ring[0]->size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; const char *pnameunit; dev = priv->dev; ctx = &priv->conf_ctx; pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); sysctl_ctx_init(ctx); priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "port_num", CTLFLAG_RD, &priv->port, 0, "Port Number"); SYSCTL_ADD_STRING(ctx, node_list, OID_AUTO, "device_name", CTLFLAG_RD, __DECONST(void *, pnameunit), 0, "PCI device name"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); } - static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; dev = priv->dev; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(node); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, "TSO packets sent"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, "Queue full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, "Queue resumed after full"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, "Transmit timeouts"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, "RX failed to allocate mbuf"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, "RX checksum offload success"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, "RX without checksum offload"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, "TX checksum offloads"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &priv->pkstats.rx_bytes, "RX Bytes"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &priv->pkstats.rx_packets, "RX packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD, &priv->pkstats.rx_multicast_packets, "RX Multicast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.rx_broadcast_packets, "RX Broadcast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD, &priv->pkstats.rx_errors, "RX Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD, &priv->pkstats.rx_dropped, "RX Dropped"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD, &priv->pkstats.rx_length_errors, "RX Length Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD, &priv->pkstats.rx_over_errors, "RX Over Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &priv->pkstats.rx_crc_errors, "RX CRC Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &priv->pkstats.rx_jabbers, "RX Jabbers"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_in_range_length_error, "RX IN_Range Length Error"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_out_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error, "RX Out Range Length Error"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_lt_64_bytes_packets, "RX Lt 64 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_127_bytes_packets, "RX 127 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_255_bytes_packets, "RX 255 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_511_bytes_packets, "RX 511 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1023_bytes_packets, "RX 1023 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1518_bytes_packets, "RX 1518 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1522_bytes_packets, "RX 1522 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1548_bytes_packets, "RX 1548 bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_gt_1548_bytes_packets, "RX Greater Then 1548 bytes Packets"); struct mlx4_en_pkt_stats { unsigned long tx_packets; unsigned long tx_bytes; unsigned long tx_multicast_packets; unsigned long tx_broadcast_packets; unsigned long tx_errors; unsigned long tx_dropped; unsigned long tx_lt_64_bytes_packets; unsigned long tx_127_bytes_packets; unsigned long tx_255_bytes_packets; unsigned long tx_511_bytes_packets; unsigned long tx_1023_bytes_packets; unsigned long tx_1518_bytes_packets; unsigned long tx_1522_bytes_packets; unsigned long tx_1548_bytes_packets; unsigned long tx_gt_1548_bytes_packets; unsigned long rx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; unsigned long tx_prio[NUM_PRIORITIES][NUM_PRIORITY_STATS]; #define NUM_PKT_STATS 72 }; SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &priv->pkstats.tx_packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &priv->pkstats.tx_packets, "TX Bytes"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD, &priv->pkstats.tx_multicast_packets, "TX Multicast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.tx_broadcast_packets, "TX Broadcast Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD, &priv->pkstats.tx_errors, "TX Errors"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD, &priv->pkstats.tx_dropped, "TX Dropped"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_lt_64_bytes_packets, "TX Less Then 64 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_127_bytes_packets, "TX 127 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_255_bytes_packets, "TX 255 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_511_bytes_packets, "TX 511 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1023_bytes_packets, "TX 1023 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1518_bytes_packets, "TX 1518 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1522_bytes_packets, "TX 1522 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1548_bytes_packets, "TX 1548 Bytes Packets"); SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_gt_1548_bytes_packets, "TX Greater Then 1548 Bytes Packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, "TX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, "TX bytes"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, "RX packets"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, "RX bytes"); SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, "RX soft errors"); } } Index: stable/9/sys/ofed/drivers/net/mlx4/en_rx.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/en_rx.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/en_rx.c (revision 279734) @@ -1,901 +1,901 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include "opt_inet.h" #include #include #include #include #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include "mlx4_en.h" static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; int possible_frags; int i; /* Set size and memtype fields */ for (i = 0; i < priv->num_frags; i++) { rx_desc->data[i].byte_count = cpu_to_be32(priv->frag_info[i].frag_size); rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); } /* If the number of used fragments does not fill up the ring stride, * * remaining (unused) fragments must be padded with null address/size * * and a special memory key */ possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; for (i = priv->num_frags; i < possible_frags; i++) { rx_desc->data[i].byte_count = 0; rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); rx_desc->data[i].addr = 0; } } static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mbuf **mb_list, int i) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; struct mbuf *mb; dma_addr_t dma; if (i == 0) mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, frag_info->frag_size); else mb = m_getjcl(M_NOWAIT, MT_DATA, 0, frag_info->frag_size); if (mb == NULL) { priv->port_stats.rx_alloc_failed++; return -ENOMEM; } dma = pci_map_single(mdev->pdev, mb->m_data, frag_info->frag_size, PCI_DMA_FROMDEVICE); rx_desc->data[i].addr = cpu_to_be64(dma); mb_list[i] = mb; return 0; } static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info); int i; for (i = 0; i < priv->num_frags; i++) if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i)) goto err; return 0; err: while (i--) m_free(mb_list[i]); return -ENOMEM; } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) { *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_frag_info *frag_info; struct mlx4_en_dev *mdev = priv->mdev; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); dma_addr_t dma; int nr; mb_list = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); frag_info = &priv->frag_info[nr]; dma = be64_to_cpu(rx_desc->data[nr].addr); #if BITS_PER_LONG == 64 en_dbg(DRV, priv, "Unmaping buffer at dma:0x%lx\n", (u64) dma); #elif BITS_PER_LONG == 32 en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); #endif pci_unmap_single(mdev->pdev, dma, frag_info->frag_size, PCI_DMA_FROMDEVICE); m_free(mb_list[nr]); } } static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int ring_ind; int buf_ind; int new_size; int err; for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) { for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; err = mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size); if (err) { if (ring->actual_size == 0) { en_err(priv, "Failed to allocate " "enough rx buffers\n"); return -ENOMEM; } else { new_size = rounddown_pow_of_two(ring->actual_size); en_warn(priv, "Only %d buffers allocated " "reducing ring size to %d\n", ring->actual_size, new_size); goto reduce_rings; } } ring->actual_size++; ring->prod++; } } return 0; reduce_rings: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; while (ring->actual_size > new_size) { ring->actual_size--; ring->prod--; mlx4_en_free_rx_desc(priv, ring, ring->actual_size); } } return 0; } static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { int index; en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n", ring->cons, ring->prod); /* Unmap and free Rx buffers */ BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size); while (ring->cons != ring->prod) { index = ring->cons & ring->size_mask; en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_rx_desc(priv, ring, index); ++ring->cons; } } #if MLX4_EN_MAX_RX_FRAGS == 3 static int frag_sizes[] = { FRAG_SZ0, FRAG_SZ1, FRAG_SZ2, }; #elif MLX4_EN_MAX_RX_FRAGS == 2 static int frag_sizes[] = { FRAG_SZ0, FRAG_SZ1, }; #else #error "Unknown MAX_RX_FRAGS" #endif void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; int buf_size = 0; int i, frag; for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) { /* * Allocate small to large but only as much as is needed for * the tail. */ while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1]) i--; priv->frag_info[frag].frag_size = frag_sizes[i]; priv->frag_info[frag].frag_prefix_size = buf_size; buf_size += priv->frag_info[frag].frag_size; } priv->num_frags = frag; priv->rx_mb_size = eff_mtu; priv->log_rx_info = ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *)); en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " "num_frags:%d):\n", eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d\n", i, priv->frag_info[i].frag_size, priv->frag_info[i].frag_prefix_size); } } int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring; int err = -ENOMEM; int tmp; ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed to allocate RX ring structure\n"); return -ENOMEM; } ring->prod = 0; ring->cons = 0; ring->size = size; ring->size_mask = size - 1; ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mbuf *)); ring->rx_info = kmalloc(tmp, GFP_KERNEL); if (!ring->rx_info) { err = -ENOMEM; goto err_ring; } en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", ring->rx_info, tmp); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) goto err_info; err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map RX buffer\n"); goto err_hwq; } ring->buf = ring->wqres.buf.direct.buf; *pring = ring; return 0; err_hwq: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_info: vfree(ring->rx_info); err_ring: kfree(ring); return err; } int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int i; int ring_ind; int err; int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE * priv->num_frags); for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; ring->prod = 0; ring->cons = 0; ring->actual_size = 0; ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn; ring->rx_alloc_order = priv->rx_alloc_order; ring->rx_alloc_size = priv->rx_alloc_size; ring->rx_buf_size = priv->rx_buf_size; ring->rx_mb_size = priv->rx_mb_size; ring->stride = stride; if (ring->stride <= TXBB_SIZE) ring->buf += TXBB_SIZE; ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride; memset(ring->buf, 0, ring->buf_size); mlx4_en_update_rx_prod_db(ring); /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); #ifdef INET /* Configure lro mngr */ if (priv->dev->if_capenable & IFCAP_LRO) { if (tcp_lro_init(&ring->lro)) priv->dev->if_capenable &= ~IFCAP_LRO; else ring->lro.ifp = priv->dev; } #endif } err = mlx4_en_fill_rx_buffers(priv); if (err) goto err_buffers; for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; ring->size_mask = ring->actual_size - 1; mlx4_en_update_rx_prod_db(ring); } return 0; err_buffers: for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); ring_ind = priv->rx_ring_num - 1; while (ring_ind >= 0) { ring = priv->rx_ring[ring_ind]; if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; ring_ind--; } return err; } void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); kfree(ring); *pring = NULL; #ifdef CONFIG_RFS_ACCEL mlx4_en_cleanup_filters(priv, ring); #endif } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { #ifdef INET tcp_lro_free(&ring->lro); #endif mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; } static void validate_loopback(struct mlx4_en_priv *priv, struct mbuf *mb) { int i; int offset = ETHER_HDR_LEN; for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { if (*(mb->m_data + offset) != (unsigned char) (i & 0xff)) goto out_loopback; } /* Loopback found */ priv->loopback_ok = 1; out_loopback: m_freem(mb); } static inline int invalid_cqe(struct mlx4_en_priv *priv, struct mlx4_cqe *cqe) { /* Drop packet on bad receive or bad checksum */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n", ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); return 1; } if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) { en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n"); return 1; } return 0; } /* Unmap a completed descriptor and free unused pages */ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mbuf **mb_list, int length) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_frag_info *frag_info; dma_addr_t dma; struct mbuf *mb; int nr; mb = mb_list[0]; mb->m_pkthdr.len = length; /* Collect used fragments while replacing them in the HW descirptors */ for (nr = 0; nr < priv->num_frags; nr++) { frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; if (nr) mb->m_next = mb_list[nr]; mb = mb_list[nr]; mb->m_len = frag_info->frag_size; dma = be64_to_cpu(rx_desc->data[nr].addr); /* Allocate a replacement page */ if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr)) goto fail; /* Unmap buffer */ - pci_unmap_single(mdev->pdev, dma, frag_info[nr].frag_size, + pci_unmap_single(mdev->pdev, dma, frag_info->frag_size, PCI_DMA_FROMDEVICE); } /* Adjust size of last fragment to match actual length */ mb->m_len = length - priv->frag_info[nr - 1].frag_prefix_size; mb->m_next = NULL; return 0; fail: /* Drop all accumulated fragments (which have already been replaced in * the descriptor) of this packet; remaining fragments are reused... */ while (nr > 0) { nr--; m_free(mb_list[nr]); } return -ENOMEM; } static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mbuf **mb_list, unsigned int length) { struct mbuf *mb; mb = mb_list[0]; /* Move relevant fragments to mb */ if (unlikely(mlx4_en_complete_rx_desc(priv, rx_desc, mb_list, length))) return NULL; return mb; } /* For cpu arch with cache line of 64B the performance is better when cqe size==64B * To enlarge cqe size from 32B to 64B --> 32B of garbage (i.e. 0xccccccc) * was added in the beginning of each cqe (the real data is in the corresponding 32B). * The following calc ensures that when factor==1, it means we are alligned to 64B * and we get the real cqe data*/ #define CQE_FACTOR_INDEX(index, factor) ((index << factor) + factor) int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; struct mlx4_cq *mcq = &cq->mcq; struct mlx4_cqe *buf = cq->buf; #ifdef INET struct lro_entry *queued; #endif int index; unsigned int length; int polled = 0; u32 cons_index = mcq->cons_index; u32 size_mask = ring->size_mask; int size = cq->size; int factor = priv->cqe_factor; if (!priv->port_up) return 0; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deducted from the CQE index instead of * reading 'cqe->index' */ index = cons_index & size_mask; cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { mb_list = ring->rx_info + (index << priv->log_rx_info); rx_desc = ring->buf + (index << ring->log_stride); /* * make sure we read the CQE after we read the ownership bit */ rmb(); if (invalid_cqe(priv, cqe)) { goto next; } /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length); if (!mb) { ring->errors++; goto next; } ring->bytes += length; ring->packets++; if (unlikely(priv->validate_loopback)) { validate_loopback(priv, mb); goto next; } mb->m_pkthdr.flowid = cq->ring; mb->m_flags |= M_FLOWID; mb->m_pkthdr.rcvif = dev; if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->sl_vid); mb->m_flags |= M_VLANTAG; } if (likely(dev->if_capabilities & IFCAP_RXCSUM) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && (cqe->checksum == cpu_to_be16(0xffff))) { priv->port_stats.rx_chksum_good++; mb->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mb->m_pkthdr.csum_data = htons(0xffff); /* This packet is eligible for LRO if it is: * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options * - not an IP fragment */ #ifdef INET if (mlx4_en_can_lro(cqe->status) && (dev->if_capenable & IFCAP_LRO)) { if (ring->lro.lro_cnt != 0 && tcp_lro_rx(&ring->lro, mb, 0) == 0) goto next; } #endif /* LRO not possible, complete processing here */ INC_PERF_COUNTER(priv->pstats.lro_misses); } else { mb->m_pkthdr.csum_flags = 0; priv->port_stats.rx_chksum_none++; } /* Push it up the stack */ dev->if_input(dev, mb); next: ++cons_index; index = cons_index & size_mask; cqe = &buf[CQE_FACTOR_INDEX(index, factor)]; if (++polled == budget) goto out; } /* Flush all pending IP reassembly sessions */ out: #ifdef INET while ((queued = SLIST_FIRST(&ring->lro.lro_active)) != NULL) { SLIST_REMOVE_HEAD(&ring->lro.lro_active, next); tcp_lro_flush(&ring->lro, queued); } #endif AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ ring->cons = mcq->cons_index; ring->prod += polled; /* Polled descriptors were realocated in place */ mlx4_en_update_rx_prod_db(ring); return polled; } /* Rx CQ polling - called by NAPI */ static int mlx4_en_poll_rx_cq(struct mlx4_en_cq *cq, int budget) { struct net_device *dev = cq->dev; int done; done = mlx4_en_process_rx_cq(dev, cq, budget); cq->tot_rx += done; return done; } void mlx4_en_rx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); int done; // Shoot one within the irq context // Because there is no NAPI in freeBSD done = mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET); if (priv->port_up && (done == MLX4_EN_RX_BUDGET) ) { taskqueue_enqueue(cq->tq, &cq->cq_task); } else { mlx4_en_arm_cq(priv, cq); } } void mlx4_en_rx_que(void *context, int pending) { struct mlx4_en_cq *cq; cq = context; while (mlx4_en_poll_rx_cq(cq, MLX4_EN_RX_BUDGET) == MLX4_EN_RX_BUDGET); mlx4_en_arm_cq(cq->dev->if_softc, cq); } /* RSS related functions */ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, struct mlx4_en_rx_ring *ring, enum mlx4_qp_state *state, struct mlx4_qp *qp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_qp_context *context; int err = 0; context = kmalloc(sizeof *context , GFP_KERNEL); if (!context) { en_err(priv, "Failed to allocate qp context\n"); return -ENOMEM; } err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; } qp->event = mlx4_en_sqp_event; memset(context, 0, sizeof *context); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); /* Cancel FCS removal if FW allows */ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) { context->param3 |= cpu_to_be32(1 << 29); ring->fcs_del = ETH_FCS_LEN; } else ring->fcs_del = 0; err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state); if (err) { mlx4_qp_remove(mdev->dev, qp); mlx4_qp_free(mdev->dev, qp); } mlx4_en_update_rx_prod_db(ring); out: kfree(context); return err; } int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) { int err; u32 qpn; err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; } err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); return err; } return 0; } void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv) { u32 qpn; qpn = priv->drop_qp.qpn; mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp); mlx4_qp_free(priv->mdev->dev, &priv->drop_qp); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); } /* Allocate rx qp's and configure them according to rss map */ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_qp_context context; struct mlx4_rss_context *rss_context; int rss_rings; void *ptr; u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | MLX4_RSS_TCP_IPV6); int i; int err = 0; int good_qps = 0; static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->qpn = rss_map->base_qpn + i; err = mlx4_en_config_rss_qp(priv, priv->rx_ring[i]->qpn, priv->rx_ring[i], &rss_map->state[i], &rss_map->qps[i]); if (err) goto rss_err; ++good_qps; } /* Configure RSS indirection qp */ err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; } rss_map->indir_qp.event = mlx4_en_sqp_event; mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0]->cqn, -1, &context); if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) rss_rings = priv->rx_ring_num; else rss_rings = priv->prof->rss_rings; ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; rss_context = ptr; rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); if (priv->mdev->profile.udp_rss) { rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6; rss_context->base_qpn_udp = rss_context->default_qpn; } rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; for (i = 0; i < 10; i++) rss_context->rss_key[i] = cpu_to_be32(rsskey[i]); err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) goto indir_err; return 0; indir_err: mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); rss_err: for (i = 0; i < good_qps; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); return err; } void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rss_map *rss_map = &priv->rss_map; int i; mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, &rss_map->indir_qp); for (i = 0; i < priv->rx_ring_num; i++) { mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]); mlx4_qp_remove(mdev->dev, &rss_map->qps[i]); mlx4_qp_free(mdev->dev, &rss_map->qps[i]); } mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num); } Index: stable/9/sys/ofed/drivers/net/mlx4/en_tx.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/en_tx.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/en_tx.c (revision 279734) @@ -1,1115 +1,1115 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4_en.h" #include "utils.h" enum { MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ MAX_BF = 256, MIN_PKT_LEN = 17, }; static int inline_thold __read_mostly = MAX_INLINE; module_param_named(inline_thold, inline_thold, uint, 0444); MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_idx) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; int tmp; int err; ring = kzalloc_node(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL, node); if (!ring) { ring = kzalloc(sizeof(struct mlx4_en_tx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed allocating TX ring\n"); return -ENOMEM; } } ring->size = size; ring->size_mask = size - 1; ring->stride = stride; ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; ring->inline_thold = min(inline_thold, MAX_INLINE); mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF); mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF); /* Allocate the buf ring */ ring->br = buf_ring_alloc(MLX4_EN_DEF_TX_QUEUE_SIZE, M_DEVBUF, M_WAITOK, &ring->tx_lock.m); if (ring->br == NULL) { en_err(priv, "Failed allocating tx_info ring\n"); return -ENOMEM; } tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = vmalloc_node(tmp, node); if (!ring->tx_info) { ring->tx_info = vmalloc(tmp); if (!ring->tx_info) { err = -ENOMEM; goto err_ring; } } en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); if (!ring->bounce_buf) { ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); if (!ring->bounce_buf) { err = -ENOMEM; goto err_info; } } ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; } err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { en_err(priv, "Failed to map TX buffer\n"); goto err_hwq_res; } ring->buf = ring->wqres.buf.direct.buf; en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, MLX4_RESERVE_BF_QP); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); goto err_map; } err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_reserve; } ring->qp.event = mlx4_en_sqp_event; err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { en_dbg(DRV, priv, "working without blueflame (%d)", err); ring->bf.uar = &mdev->priv_uar; ring->bf.uar->map = mdev->uar_map; ring->bf_enabled = false; } else ring->bf_enabled = true; ring->queue_index = queue_idx; if (queue_idx < priv->num_tx_rings_p_up ) CPU_SET(queue_idx, &ring->affinity_mask); *pring = ring; return 0; err_reserve: mlx4_qp_release_range(mdev->dev, ring->qpn, 1); err_map: mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_bounce: kfree(ring->bounce_buf); err_info: vfree(ring->tx_info); err_ring: buf_ring_free(ring->br, M_DEVBUF); kfree(ring); return err; } void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring = *pring; en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); buf_ring_free(ring->br, M_DEVBUF); if (ring->bf_enabled) mlx4_bf_free(mdev->dev, &ring->bf); mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); vfree(ring->tx_info); mtx_destroy(&ring->tx_lock.m); mtx_destroy(&ring->comp_lock.m); kfree(ring); *pring = NULL; } int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio) { struct mlx4_en_dev *mdev = priv->mdev; int err; ring->cqn = cq; ring->prod = 0; ring->cons = 0xffffffff; ring->last_nr_txbb = 1; ring->poll_cnt = 0; ring->blocked = 0; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = ring->qp.qpn << 8; mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, ring->cqn, user_prio, &ring->context); if (ring->bf_enabled) ring->context.usr_page = cpu_to_be32(ring->bf.uar->index); err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); return err; } void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring) { struct mlx4_en_dev *mdev = priv->mdev; mlx4_qp_modify(mdev->dev, NULL, ring->qp_state, MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; void *end = ring->buf + ring->buf_size; __be32 *ptr = (__be32 *)tx_desc; __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); int i; /* Optimize the common case when there are no wraparounds */ if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) /* Stamp the freed descriptor */ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; } else /* Stamp the freed descriptor */ for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { *ptr = stamp; ptr += STAMP_DWORDS; if ((void *)ptr >= end) { ptr = ring->buf; stamp ^= cpu_to_be32(0x80000000); } } } static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int index, u8 owner, u64 timestamp) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; struct mbuf *mb = tx_info->mb; void *end = ring->buf + ring->buf_size; int frags = tx_info->nr_segs;; int i; /* Optimize the common case when there are no wraparounds */ if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (!tx_info->inl) { if (tx_info->linear) { dma_unmap_single(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); ++data; } for (i = 0; i < frags; i++) { pci_unmap_single(mdev->pdev, (dma_addr_t) be64_to_cpu(data[i].addr), data[i].byte_count, PCI_DMA_TODEVICE); } } } else { if (!tx_info->inl) { if ((void *) data >= end) { data = ring->buf + ((void *)data - end); } if (tx_info->linear) { dma_unmap_single(priv->ddev, (dma_addr_t) be64_to_cpu(data->addr), be32_to_cpu(data->byte_count), PCI_DMA_TODEVICE); ++data; } for (i = 0; i < frags; i++) { /* Check for wraparound before unmapping */ if ((void *) data >= end) data = ring->buf; pci_unmap_single(mdev->pdev, (dma_addr_t) be64_to_cpu(data->addr), data->byte_count, PCI_DMA_TODEVICE); ++data; } } } /* Send a copy of the frame to the BPF listener */ if (priv->dev && priv->dev->if_bpf) ETHER_BPF_MTAP(priv->dev, mb); m_freem(mb); return tx_info->nr_txbb; } int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { struct mlx4_en_priv *priv = netdev_priv(dev); int cnt = 0; /* Skip last polled descriptor */ ring->cons += ring->last_nr_txbb; en_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", ring->cons, ring->prod); if ((u32) (ring->prod - ring->cons) > ring->size) { en_warn(priv, "Tx consumer passed producer!\n"); return 0; } while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0); ring->cons += ring->last_nr_txbb; cnt++; } if (cnt) en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); return cnt; } static int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; u16 new_index, ring_index, stamp_index; u32 txbbs_skipped = 0; u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; u32 packets = 0; u32 bytes = 0; int factor = priv->cqe_factor; u64 timestamp = 0; int done = 0; if (!priv->port_up) return 0; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { /* * make sure we read the CQE after we read the * ownership bit */ rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { en_err(priv, "CQE completed in error - vendor syndrom: 0x%x syndrom: 0x%x\n", ((struct mlx4_err_cqe *)cqe)-> vendor_err_syndrome, ((struct mlx4_err_cqe *)cqe)->syndrome); } /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; do { txbbs_skipped += ring->last_nr_txbb; ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size), timestamp); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring->cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); ++cons_index; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; } /* * To prevent CQ overflow we first update CQ consumer and only then * the ring consumer. */ mcq->cons_index = cons_index; mlx4_cq_set_ci(mcq); wmb(); ring->cons += txbbs_skipped; /* Wakeup Tx queue if it was stopped and ring is not full */ if (unlikely(ring->blocked) && (ring->prod - ring->cons) <= ring->full_size) { ring->blocked = 0; if (atomic_fetchadd_int(&priv->blocked, -1) == 1) atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); ring->wake_queue++; priv->port_stats.wake_queue++; } return done; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) { struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; if (!spin_trylock(&ring->comp_lock)) return; mlx4_en_process_tx_cq(cq->dev, cq); mod_timer(&cq->timer, jiffies + 1); spin_unlock(&ring->comp_lock); } void mlx4_en_poll_tx_cq(unsigned long data) { struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; struct mlx4_en_priv *priv = netdev_priv(cq->dev); struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring]; u32 inflight; INC_PERF_COUNTER(priv->pstats.tx_poll); if (!spin_trylock(&ring->comp_lock)) { mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); return; } mlx4_en_process_tx_cq(cq->dev, cq); inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb); /* If there are still packets in flight and the timer has not already * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight && priv->port_up) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); spin_unlock(&ring->comp_lock); } static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, u32 index, unsigned int desc_size) { u32 copy = (ring->size - index) * TXBB_SIZE; int i; for (i = desc_size - copy - 4; i >= 0; i -= 4) { if ((i & (TXBB_SIZE - 1)) == 0) wmb(); *((u32 *) (ring->buf + i)) = *((u32 *) (ring->bounce_buf + copy + i)); } for (i = copy - 4; i >= 4 ; i -= 4) { if ((i & (TXBB_SIZE - 1)) == 0) wmb(); *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = *((u32 *) (ring->bounce_buf + i)); } /* Return real descriptor location */ return ring->buf + index * TXBB_SIZE; } static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) { struct mlx4_en_cq *cq = priv->tx_cq[tx_ind]; struct mlx4_en_tx_ring *ring = priv->tx_ring[tx_ind]; /* If we don't have a pending timer, set one up to catch our recent post in case the interface becomes idle */ if (!timer_pending(&cq->timer)) mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) if (spin_trylock(&ring->comp_lock)) { mlx4_en_process_tx_cq(priv->dev, cq); spin_unlock(&ring->comp_lock); } } static int is_inline(struct mbuf *mb, int thold) { if (thold && mb->m_pkthdr.len <= thold && (mb->m_pkthdr.csum_flags & CSUM_TSO) == 0) return 1; return 0; } static int inline_size(struct mbuf *mb) { int len; len = mb->m_pkthdr.len; if (len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg) <= MLX4_INLINE_ALIGN) return ALIGN(len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg), 16); else return ALIGN(len + CTRL_SIZE + 2 * sizeof(struct mlx4_wqe_inline_seg), 16); } static int get_head_size(struct mbuf *mb) { struct ether_vlan_header *eh; struct tcphdr *th; struct ip *ip; int ip_hlen, tcp_hlen; struct ip6_hdr *ip6; uint16_t eth_type; int eth_hdr_len; eh = mtod(mb, struct ether_vlan_header *); if (mb->m_len < ETHER_HDR_LEN) return (0); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { eth_type = ntohs(eh->evl_proto); eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { eth_type = ntohs(eh->evl_encap_proto); eth_hdr_len = ETHER_HDR_LEN; } if (mb->m_len < eth_hdr_len) return (0); switch (eth_type) { case ETHERTYPE_IP: ip = (struct ip *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip)) return (0); if (ip->ip_p != IPPROTO_TCP) return (0); ip_hlen = ip->ip_hl << 2; eth_hdr_len += ip_hlen; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len); if (mb->m_len < eth_hdr_len + sizeof(*ip6)) return (0); if (ip6->ip6_nxt != IPPROTO_TCP) return (0); eth_hdr_len += sizeof(*ip6); break; default: return (0); } if (mb->m_len < eth_hdr_len + sizeof(*th)) return (0); th = (struct tcphdr *)(mb->m_data + eth_hdr_len); tcp_hlen = th->th_off << 2; eth_hdr_len += tcp_hlen; if (mb->m_len < eth_hdr_len) return (0); return (eth_hdr_len); } static int get_real_size(struct mbuf *mb, struct net_device *dev, int *p_n_segs, int *lso_header_size, int inl) { struct mbuf *m; int nr_segs = 0; for (m = mb; m != NULL; m = m->m_next) if (m->m_len) nr_segs++; if (mb->m_pkthdr.csum_flags & CSUM_TSO) { *lso_header_size = get_head_size(mb); if (*lso_header_size) { if (mb->m_len == *lso_header_size) nr_segs--; *p_n_segs = nr_segs; return CTRL_SIZE + nr_segs * DS_SIZE + ALIGN(*lso_header_size + 4, DS_SIZE); } } else *lso_header_size = 0; *p_n_segs = nr_segs; if (inl) return inline_size(mb); return (CTRL_SIZE + nr_segs * DS_SIZE); } static struct mbuf *mb_copy(struct mbuf *mb, int *offp, char *data, int len) { int bytes; int off; off = *offp; while (len) { bytes = min(mb->m_len - off, len); if (bytes) memcpy(data, mb->m_data + off, bytes); len -= bytes; data += bytes; off += bytes; if (off == mb->m_len) { off = 0; mb = mb->m_next; } } *offp = off; return (mb); } static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct mbuf *mb, int real_size, u16 *vlan_tag, int tx_ind) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; int len; int off; off = 0; len = mb->m_pkthdr.len; if (len <= spc) { inl->byte_count = cpu_to_be32(1 << 31 | (max_t(typeof(len), len, MIN_PKT_LEN))); mb_copy(mb, &off, (void *)(inl + 1), len); if (len < MIN_PKT_LEN) memset(((void *)(inl + 1)) + len, 0, MIN_PKT_LEN - len); } else { inl->byte_count = cpu_to_be32(1 << 31 | spc); mb = mb_copy(mb, &off, (void *)(inl + 1), spc); inl = (void *) (inl + 1) + spc; mb_copy(mb, &off, (void *)(inl + 1), len - spc); wmb(); inl->byte_count = cpu_to_be32(1 << 31 | (len - spc)); } tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag); tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; } static unsigned long hashrandom; static void hashrandom_init(void *arg) { hashrandom = random(); } SYSINIT(hashrandom_init, SI_SUB_KLD, SI_ORDER_SECOND, &hashrandom_init, NULL); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb) { struct mlx4_en_priv *priv = netdev_priv(dev); u32 rings_p_up = priv->num_tx_rings_p_up; - u32 vlan_tag = 0; u32 up = 0; u32 queue_index; +#if (MLX4_EN_NUM_UP > 1) /* Obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { - vlan_tag = mb->m_pkthdr.ether_vtag; - up = (vlan_tag >> 13); + u32 vlan_tag = mb->m_pkthdr.ether_vtag; + up = (vlan_tag >> 13) % MLX4_EN_NUM_UP; } - +#endif /* hash mbuf */ queue_index = mlx4_en_hashmbuf(MLX4_F_HASHL3 | MLX4_F_HASHL4, mb, hashrandom); return ((queue_index % rings_p_up) + (up * rings_p_up)); } static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) { __iowrite64_copy(dst, src, bytecnt / 8); } static u64 mlx4_en_mac_to_u64(u8 *addr) { u64 mac = 0; int i; for (i = 0; i < ETHER_ADDR_LEN; i++) { mac <<= 8; mac |= addr[i]; } return mac; } static int mlx4_en_xmit(struct net_device *dev, int tx_ind, struct mbuf **mbp) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; struct mlx4_en_cq *cq; struct mlx4_en_tx_desc *tx_desc; struct mlx4_wqe_data_seg *data; struct mlx4_en_tx_info *tx_info; struct mbuf *m; int nr_txbb; int nr_segs; int desc_size; int real_size; dma_addr_t dma; u32 index, bf_index, ring_size; __be32 op_own; u16 vlan_tag = 0; int i; int lso_header_size; bool bounce = false; bool inl = false; struct mbuf *mb; mb = *mbp; int defrag = 1; if (!priv->port_up) goto tx_drop; ring = priv->tx_ring[tx_ind]; ring_size = ring->size; inl = is_inline(mb, ring->inline_thold); retry: real_size = get_real_size(mb, dev, &nr_segs, &lso_header_size, inl); if (unlikely(!real_size)) goto tx_drop; /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size / TXBB_SIZE; if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (defrag) { mb = m_defrag(*mbp, M_NOWAIT); if (mb == NULL) { mb = *mbp; goto tx_drop; } *mbp = mb; defrag = 0; goto retry; } en_warn(priv, "Oversized header or SG list\n"); goto tx_drop; } /* Obtain VLAN information if present */ if (mb->m_flags & M_VLANTAG) { vlan_tag = mb->m_pkthdr.ether_vtag; } /* Check available TXBBs and 2K spare for prefetch * Even if netif_tx_stop_queue() will be called * driver will send current packet to ensure * that at least one completion will be issued after * stopping the queue */ if (unlikely((int)(ring->prod - ring->cons) > ring->full_size)) { /* every full Tx ring stops queue */ if (ring->blocked == 0) atomic_add_int(&priv->blocked, 1); /* Set HW-queue-is-full flag */ atomic_set_int(&dev->if_drv_flags, IFF_DRV_OACTIVE); ring->blocked = 1; priv->port_stats.queue_stopped++; ring->queue_stopped++; /* Use interrupts to find out when queue opened */ cq = priv->tx_cq[tx_ind]; mlx4_en_arm_cq(priv, cq); return EBUSY; } /* Track current inflight packets for performance analysis */ AVG_PERF_COUNTER(priv->pstats.inflight_avg, (u32) (ring->prod - ring->cons - 1)); /* Packet is good - grab an index and transmit it */ index = ring->prod & ring->size_mask; bf_index = ring->prod; /* See if we have enough space for whole descriptor TXBB for setting * SW ownership on next descriptor; if not, use a bounce buffer. */ if (likely(index + nr_txbb <= ring_size)) tx_desc = ring->buf + index * TXBB_SIZE; else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; } /* Save mb in tx_info ring */ tx_info = &ring->tx_info[index]; tx_info->mb = mb; tx_info->nr_txbb = nr_txbb; tx_info->nr_segs = nr_segs; if (lso_header_size) { memcpy(tx_desc->lso.header, mb->m_data, lso_header_size); data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, DS_SIZE)); /* lso header is part of m_data. * need to omit when mapping DMA */ mb->m_data += lso_header_size; mb->m_len -= lso_header_size; } else data = &tx_desc->data; /* valid only for none inline segments */ tx_info->data_offset = (void *)data - (void *)tx_desc; if (inl) { tx_info->inl = 1; } else { for (i = 0, m = mb; i < nr_segs; i++, m = m->m_next) { if (m->m_len == 0) { i--; continue; } dma = pci_map_single(mdev->dev->pdev, m->m_data, m->m_len, PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma); data->lkey = cpu_to_be32(mdev->mr.key); wmb(); data->byte_count = cpu_to_be32(m->m_len); data++; } if (lso_header_size) { mb->m_data -= lso_header_size; mb->m_len += lso_header_size; } tx_info->inl = 0; } /* Prepare ctrl segement apart opcode+ownership, which depends on * whether LSO is used */ tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag; tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO | CSUM_TCP | CSUM_UDP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)) { if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM); if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_TCP_UDP_CSUM); priv->port_stats.tx_chksum_offload++; ring->tx_csum++; } if (unlikely(priv->validate_loopback)) { /* Copy dst mac address to wqe */ struct ether_header *ethh; u64 mac; u32 mac_l, mac_h; ethh = mtod(mb, struct ether_header *); mac = mlx4_en_mac_to_u64(ethh->ether_dhost); if (mac) { mac_h = (u32) ((mac & 0xffff00000000ULL) >> 16); mac_l = (u32) (mac & 0xffffffff); tx_desc->ctrl.srcrb_flags |= cpu_to_be32(mac_h); tx_desc->ctrl.imm = cpu_to_be32(mac_l); } } /* Handle LSO (TSO) packets */ if (lso_header_size) { int segsz; /* Mark opcode as LSO */ op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) | ((ring->prod & ring_size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); /* Fill in the LSO prefix */ tx_desc->lso.mss_hdr_size = cpu_to_be32( mb->m_pkthdr.tso_segsz << 16 | lso_header_size); priv->port_stats.tso_packets++; segsz = mb->m_pkthdr.tso_segsz; i = ((mb->m_pkthdr.len - lso_header_size + segsz - 1) / segsz); tx_info->nr_bytes= mb->m_pkthdr.len + (i - 1) * lso_header_size; ring->packets += i; } else { /* Normal (Non LSO) packet */ op_own = cpu_to_be32(MLX4_OPCODE_SEND) | ((ring->prod & ring_size) ? cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); tx_info->nr_bytes = max(mb->m_pkthdr.len, (unsigned int)ETHER_MIN_LEN - ETHER_CRC_LEN); ring->packets++; } ring->bytes += tx_info->nr_bytes; AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, mb->m_pkthdr.len); if (tx_info->inl) { build_inline_wqe(tx_desc, mb, real_size, &vlan_tag, tx_ind); tx_info->inl = 1; } ring->prod += nr_txbb; /* If we used a bounce buffer then copy descriptor back into place */ if (unlikely(bounce)) tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) { *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); op_own |= htonl((bf_index & 0xffff) << 8); /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ wmb(); tx_desc->ctrl.owner_opcode = op_own; wmb(); mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl, desc_size); wmb(); ring->bf.offset ^= ring->bf.buf_size; } else { /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ wmb(); tx_desc->ctrl.owner_opcode = op_own; wmb(); writel(cpu_to_be32(ring->doorbell_qpn), ring->bf.uar->map + MLX4_SEND_DOORBELL); } return 0; tx_drop: *mbp = NULL; m_freem(mb); return EINVAL; } static int mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mbuf *next; int enqueued, err = 0; ring = priv->tx_ring[tx_ind]; if ((dev->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || priv->port_up == 0) { if (m != NULL) err = drbr_enqueue(dev, ring->br, m); return (err); } enqueued = 0; if (m != NULL) { if ((err = drbr_enqueue(dev, ring->br, m)) != 0) return (err); } /* Process the queue */ while ((next = drbr_peek(dev, ring->br)) != NULL) { if ((err = mlx4_en_xmit(dev, tx_ind, &next)) != 0) { if (next == NULL) { drbr_advance(dev, ring->br); } else { drbr_putback(dev, ring->br, next); } break; } drbr_advance(dev, ring->br); enqueued++; dev->if_obytes += next->m_pkthdr.len; if (next->m_flags & M_MCAST) dev->if_omcasts++; if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } if (enqueued > 0) ring->watchdog_time = ticks; return (err); } void mlx4_en_tx_que(void *context, int pending) { struct mlx4_en_tx_ring *ring; struct mlx4_en_priv *priv; struct net_device *dev; struct mlx4_en_cq *cq; int tx_ind; cq = context; dev = cq->dev; priv = dev->if_softc; tx_ind = cq->ring; ring = priv->tx_ring[tx_ind]; if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_xmit_poll(priv, tx_ind); spin_lock(&ring->tx_lock); if (!drbr_empty(dev, ring->br)) mlx4_en_transmit_locked(dev, tx_ind, NULL); spin_unlock(&ring->tx_lock); } } int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mlx4_en_cq *cq; int i = 0, err = 0; /* Which queue to use */ if ((m->m_flags & (M_FLOWID | M_VLANTAG)) == M_FLOWID) { i = m->m_pkthdr.flowid % (priv->tx_ring_num - 1); } else { i = mlx4_en_select_queue(dev, m); } ring = priv->tx_ring[i]; if (spin_trylock(&ring->tx_lock)) { err = mlx4_en_transmit_locked(dev, i, m); spin_unlock(&ring->tx_lock); /* Poll CQ here */ mlx4_en_xmit_poll(priv, i); } else { err = drbr_enqueue(dev, ring->br, m); cq = priv->tx_cq[i]; taskqueue_enqueue(cq->tq, &cq->cq_task); } return (err); } /* * Flush ring buffers. */ void mlx4_en_qflush(struct ifnet *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_tx_ring *ring; struct mbuf *m; for (int i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; spin_lock(&ring->tx_lock); while ((m = buf_ring_dequeue_sc(ring->br)) != NULL) m_freem(m); spin_unlock(&ring->tx_lock); } if_qflush(dev); } Index: stable/9/sys/ofed/drivers/net/mlx4/main.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/main.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/main.c (revision 279734) @@ -1,3798 +1,3816 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include /* * kmod.h must be included before module.h since it includes (indirectly) sys/module.h * To use the FBSD macro sys/module.h should define MODULE_VERSION before linux/module does. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx4.h" #include "fw.h" #include "icm.h" #include "mlx4_stats.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver"); MODULE_LICENSE("Dual BSD/GPL"); struct workqueue_struct *mlx4_wq; #ifdef CONFIG_MLX4_DEBUG int mlx4_debug_level = 0; module_param_named(debug_level, mlx4_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); #endif /* CONFIG_MLX4_DEBUG */ #ifdef CONFIG_PCI_MSI static int msi_x = 1; module_param(msi_x, int, 0444); MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number of MSI-X irqs to msi_x (non-SRIOV only)"); #else /* CONFIG_PCI_MSI */ #define msi_x (0) #endif /* CONFIG_PCI_MSI */ static int enable_sys_tune = 0; module_param(enable_sys_tune, int, 0444); MODULE_PARM_DESC(enable_sys_tune, "Tune the cpu's for better performance (default 0)"); int mlx4_blck_lb = 1; module_param_named(block_loopback, mlx4_blck_lb, int, 0644); MODULE_PARM_DESC(block_loopback, "Block multicast loopback packets if > 0 " "(default: 1)"); enum { DEFAULT_DOMAIN = 0, BDF_STR_SIZE = 8, /* bb:dd.f- */ DBDF_STR_SIZE = 13 /* mmmm:bb:dd.f- */ }; enum { NUM_VFS, PROBE_VF, PORT_TYPE_ARRAY }; enum { VALID_DATA, INVALID_DATA, INVALID_STR }; struct param_data { int id; struct mlx4_dbdf2val_lst dbdf2val; }; static struct param_data num_vfs = { .id = NUM_VFS, .dbdf2val = { .name = "num_vfs param", .num_vals = 1, .def_val = {0}, .range = {0, MLX4_MAX_NUM_VF} } }; module_param_string(num_vfs, num_vfs.dbdf2val.str, sizeof(num_vfs.dbdf2val.str), 0444); MODULE_PARM_DESC(num_vfs, "Either single value (e.g. '5') to define uniform num_vfs value for all devices functions\n" "\t\tor a string to map device function numbers to their num_vfs values (e.g. '0000:04:00.0-5,002b:1c:0b.a-15').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for num_vfs value (e.g. 15)."); static struct param_data probe_vf = { .id = PROBE_VF, .dbdf2val = { .name = "probe_vf param", .num_vals = 1, .def_val = {0}, .range = {0, MLX4_MAX_NUM_VF} } }; module_param_string(probe_vf, probe_vf.dbdf2val.str, sizeof(probe_vf.dbdf2val.str), 0444); MODULE_PARM_DESC(probe_vf, "Either single value (e.g. '3') to define uniform number of VFs to probe by the pf driver for all devices functions\n" "\t\tor a string to map device function numbers to their probe_vf values (e.g. '0000:04:00.0-3,002b:1c:0b.a-13').\n" "\t\tHexadecimal digits for the device function (e.g. 002b:1c:0b.a) and decimal for probe_vf value (e.g. 13)."); int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; module_param_named(log_num_mgm_entry_size, mlx4_log_num_mgm_entry_size, int, 0444); MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " of qp per mcg, for example:" " 10 gives 248.range: 7 <=" " log_num_mgm_entry_size <= 12." " To activate device managed" " flow steering when available, set to -1"); static int high_rate_steer; module_param(high_rate_steer, int, 0444); MODULE_PARM_DESC(high_rate_steer, "Enable steering mode for higher packet rate" " (default off)"); static int fast_drop; module_param_named(fast_drop, fast_drop, int, 0444); MODULE_PARM_DESC(fast_drop, "Enable fast packet drop when no recieve WQEs are posted"); int mlx4_enable_64b_cqe_eqe = 1; module_param_named(enable_64b_cqe_eqe, mlx4_enable_64b_cqe_eqe, int, 0644); MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the the FW supports this if non-zero (default: 1)"); #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE static char mlx4_version[] __devinitdata = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static int log_num_mac = 7; module_param_named(log_num_mac, log_num_mac, int, 0444); MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)"); static int log_num_vlan; module_param_named(log_num_vlan, log_num_vlan, int, 0444); MODULE_PARM_DESC(log_num_vlan, "(Obsolete) Log2 max number of VLANs per ETH port (0-7)"); /* Log2 max number of VLANs per ETH port (0-7) */ #define MLX4_LOG_NUM_VLANS 7 int log_mtts_per_seg = ilog2(1); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment " "(0-7) (default: 0)"); static struct param_data port_type_array = { .id = PORT_TYPE_ARRAY, .dbdf2val = { .name = "port_type_array param", .num_vals = 2, .def_val = {MLX4_PORT_TYPE_ETH, MLX4_PORT_TYPE_ETH}, .range = {MLX4_PORT_TYPE_IB, MLX4_PORT_TYPE_NA} } }; module_param_string(port_type_array, port_type_array.dbdf2val.str, sizeof(port_type_array.dbdf2val.str), 0444); MODULE_PARM_DESC(port_type_array, "Either pair of values (e.g. '1,2') to define uniform port1/port2 types configuration for all devices functions\n" "\t\tor a string to map device function numbers to their pair of port types values (e.g. '0000:04:00.0-1;2,002b:1c:0b.a-1;1').\n" "\t\tValid port types: 1-ib, 2-eth, 3-auto, 4-N/A\n" "\t\tIn case that only one port is available use the N/A port type for port2 (e.g '1,4')."); struct mlx4_port_config { struct list_head list; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; struct pci_dev *pdev; }; #define MLX4_LOG_NUM_MTT 20 /* We limit to 30 as of a bit map issue which uses int and not uint. see mlx4_buddy_init -> bitmap_zero which gets int. */ #define MLX4_MAX_LOG_NUM_MTT 30 static struct mlx4_profile mod_param_profile = { .num_qp = 19, .num_srq = 16, .rdmarc_per_qp = 4, .num_cq = 16, .num_mcg = 13, .num_mpt = 19, .num_mtt_segs = 0, /* max(20, 2*MTTs for host memory)) */ }; module_param_named(log_num_qp, mod_param_profile.num_qp, int, 0444); MODULE_PARM_DESC(log_num_qp, "log maximum number of QPs per HCA (default: 19)"); module_param_named(log_num_srq, mod_param_profile.num_srq, int, 0444); MODULE_PARM_DESC(log_num_srq, "log maximum number of SRQs per HCA " "(default: 16)"); module_param_named(log_rdmarc_per_qp, mod_param_profile.rdmarc_per_qp, int, 0444); MODULE_PARM_DESC(log_rdmarc_per_qp, "log number of RDMARC buffers per QP " "(default: 4)"); module_param_named(log_num_cq, mod_param_profile.num_cq, int, 0444); MODULE_PARM_DESC(log_num_cq, "log maximum number of CQs per HCA (default: 16)"); module_param_named(log_num_mcg, mod_param_profile.num_mcg, int, 0444); MODULE_PARM_DESC(log_num_mcg, "log maximum number of multicast groups per HCA " "(default: 13)"); module_param_named(log_num_mpt, mod_param_profile.num_mpt, int, 0444); MODULE_PARM_DESC(log_num_mpt, "log maximum number of memory protection table entries per " "HCA (default: 19)"); module_param_named(log_num_mtt, mod_param_profile.num_mtt_segs, int, 0444); MODULE_PARM_DESC(log_num_mtt, "log maximum number of memory translation table segments per " "HCA (default: max(20, 2*MTTs for register all of the host memory limited to 30))"); enum { MLX4_IF_STATE_BASIC, MLX4_IF_STATE_EXTENDED }; static inline u64 dbdf_to_u64(int domain, int bus, int dev, int fn) { return (domain << 20) | (bus << 12) | (dev << 4) | fn; } static inline void pr_bdf_err(const char *dbdf, const char *pname) { pr_warn("mlx4_core: '%s' is not valid bdf in '%s'\n", dbdf, pname); } static inline void pr_val_err(const char *dbdf, const char *pname, const char *val) { pr_warn("mlx4_core: value '%s' of bdf '%s' in '%s' is not valid\n" , val, dbdf, pname); } static inline void pr_out_of_range_bdf(const char *dbdf, int val, struct mlx4_dbdf2val_lst *dbdf2val) { pr_warn("mlx4_core: value %d in bdf '%s' of '%s' is out of its valid range (%d,%d)\n" , val, dbdf, dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); } static inline void pr_out_of_range(struct mlx4_dbdf2val_lst *dbdf2val) { pr_warn("mlx4_core: value of '%s' is out of its valid range (%d,%d)\n" , dbdf2val->name , dbdf2val->range.min, dbdf2val->range.max); } static inline int is_in_range(int val, struct mlx4_range *r) { return (val >= r->min && val <= r->max); } static int update_defaults(struct param_data *pdata) { long int val[MLX4_MAX_BDF_VALS]; int ret; char *t, *p = pdata->dbdf2val.str; char sval[32]; int val_len; if (!strlen(p) || strchr(p, ':') || strchr(p, '.') || strchr(p, ';')) return INVALID_STR; switch (pdata->id) { case PORT_TYPE_ARRAY: t = strchr(p, ','); if (!t || t == p || (t - p) > sizeof(sval)) return INVALID_STR; val_len = t - p; strncpy(sval, p, val_len); sval[val_len] = 0; ret = kstrtol(sval, 0, &val[0]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } ret = kstrtol(t + 1, 0, &val[1]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[1], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } pdata->dbdf2val.tbl[0].val[0] = val[0]; pdata->dbdf2val.tbl[0].val[1] = val[1]; break; case NUM_VFS: case PROBE_VF: ret = kstrtol(p, 0, &val[0]); if (ret == -EINVAL) return INVALID_STR; if (ret || !is_in_range(val[0], &pdata->dbdf2val.range)) { pr_out_of_range(&pdata->dbdf2val); return INVALID_DATA; } pdata->dbdf2val.tbl[0].val[0] = val[0]; break; } pdata->dbdf2val.tbl[1].dbdf = MLX4_ENDOF_TBL; return VALID_DATA; } int mlx4_fill_dbdf2val_tbl(struct mlx4_dbdf2val_lst *dbdf2val_lst) { int domain, bus, dev, fn; u64 dbdf; char *p, *t, *v; char tmp[32]; char sbdf[32]; char sep = ','; int j, k, str_size, i = 1; int prfx_size; p = dbdf2val_lst->str; for (j = 0; j < dbdf2val_lst->num_vals; j++) dbdf2val_lst->tbl[0].val[j] = dbdf2val_lst->def_val[j]; dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; str_size = strlen(dbdf2val_lst->str); if (str_size == 0) return 0; while (strlen(p)) { prfx_size = BDF_STR_SIZE; sbdf[prfx_size] = 0; strncpy(sbdf, p, prfx_size); domain = DEFAULT_DOMAIN; if (sscanf(sbdf, "%02x:%02x.%x-", &bus, &dev, &fn) != 3) { prfx_size = DBDF_STR_SIZE; sbdf[prfx_size] = 0; strncpy(sbdf, p, prfx_size); if (sscanf(sbdf, "%04x:%02x:%02x.%x-", &domain, &bus, &dev, &fn) != 4) { pr_bdf_err(sbdf, dbdf2val_lst->name); goto err; } sprintf(tmp, "%04x:%02x:%02x.%x-", domain, bus, dev, fn); } else { sprintf(tmp, "%02x:%02x.%x-", bus, dev, fn); } if (strnicmp(sbdf, tmp, sizeof(tmp))) { pr_bdf_err(sbdf, dbdf2val_lst->name); goto err; } dbdf = dbdf_to_u64(domain, bus, dev, fn); for (j = 1; j < i; j++) if (dbdf2val_lst->tbl[j].dbdf == dbdf) { pr_warn("mlx4_core: in '%s', %s appears multiple times\n" , dbdf2val_lst->name, sbdf); goto err; } if (i >= MLX4_DEVS_TBL_SIZE) { pr_warn("mlx4_core: Too many devices in '%s'\n" , dbdf2val_lst->name); goto err; } p += prfx_size; t = strchr(p, sep); t = t ? t : p + strlen(p); if (p >= t) { pr_val_err(sbdf, dbdf2val_lst->name, ""); goto err; } for (k = 0; k < dbdf2val_lst->num_vals; k++) { char sval[32]; long int val; int ret, val_len; char vsep = ';'; v = (k == dbdf2val_lst->num_vals - 1) ? t : strchr(p, vsep); if (!v || v > t || v == p || (v - p) > sizeof(sval)) { pr_val_err(sbdf, dbdf2val_lst->name, p); goto err; } val_len = v - p; strncpy(sval, p, val_len); sval[val_len] = 0; ret = kstrtol(sval, 0, &val); if (ret) { if (strchr(p, vsep)) pr_warn("mlx4_core: too many vals in bdf '%s' of '%s'\n" , sbdf, dbdf2val_lst->name); else pr_val_err(sbdf, dbdf2val_lst->name, sval); goto err; } if (!is_in_range(val, &dbdf2val_lst->range)) { pr_out_of_range_bdf(sbdf, val, dbdf2val_lst); goto err; } dbdf2val_lst->tbl[i].val[k] = val; p = v; if (p[0] == vsep) p++; } dbdf2val_lst->tbl[i].dbdf = dbdf; if (strlen(p)) { if (p[0] != sep) { pr_warn("mlx4_core: expect separator '%c' before '%s' in '%s'\n" , sep, p, dbdf2val_lst->name); goto err; } p++; } i++; if (i < MLX4_DEVS_TBL_SIZE) dbdf2val_lst->tbl[i].dbdf = MLX4_ENDOF_TBL; } return 0; err: dbdf2val_lst->tbl[1].dbdf = MLX4_ENDOF_TBL; pr_warn("mlx4_core: The value of '%s' is incorrect. The value is discarded!\n" , dbdf2val_lst->name); return -EINVAL; } EXPORT_SYMBOL(mlx4_fill_dbdf2val_tbl); int mlx4_get_val(struct mlx4_dbdf2val *tbl, struct pci_dev *pdev, int idx, int *val) { u64 dbdf; int i = 1; *val = tbl[0].val[idx]; if (!pdev) return -EINVAL; dbdf = dbdf_to_u64(pci_get_domain(pdev->dev.bsddev), pci_get_bus(pdev->dev.bsddev), PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); while ((i < MLX4_DEVS_TBL_SIZE) && (tbl[i].dbdf != MLX4_ENDOF_TBL)) { if (tbl[i].dbdf == dbdf) { *val = tbl[i].val[idx]; return 0; } i++; } return 0; } EXPORT_SYMBOL(mlx4_get_val); static void process_mod_param_profile(struct mlx4_profile *profile) { vm_size_t hwphyssz; hwphyssz = 0; TUNABLE_ULONG_FETCH("hw.realmem", (u_long *) &hwphyssz); profile->num_qp = 1 << mod_param_profile.num_qp; profile->num_srq = 1 << mod_param_profile.num_srq; profile->rdmarc_per_qp = 1 << mod_param_profile.rdmarc_per_qp; profile->num_cq = 1 << mod_param_profile.num_cq; profile->num_mcg = 1 << mod_param_profile.num_mcg; profile->num_mpt = 1 << mod_param_profile.num_mpt; /* * We want to scale the number of MTTs with the size of the * system memory, since it makes sense to register a lot of * memory on a system with a lot of memory. As a heuristic, * make sure we have enough MTTs to register twice the system * memory (with PAGE_SIZE entries). * * This number has to be a power of two and fit into 32 bits * due to device limitations. We cap this at 2^30 as of bit map * limitation to work with int instead of uint (mlx4_buddy_init -> bitmap_zero) * That limits us to 4TB of memory registration per HCA with * 4KB pages, which is probably OK for the next few months. */ if (mod_param_profile.num_mtt_segs) profile->num_mtt_segs = 1 << mod_param_profile.num_mtt_segs; else { profile->num_mtt_segs = roundup_pow_of_two(max_t(unsigned, 1 << (MLX4_LOG_NUM_MTT - log_mtts_per_seg), min(1UL << (MLX4_MAX_LOG_NUM_MTT - log_mtts_per_seg), (hwphyssz << 1) >> log_mtts_per_seg))); /* set the actual value, so it will be reflected to the user using the sysfs */ mod_param_profile.num_mtt_segs = ilog2(profile->num_mtt_segs); } } int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { int i; for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) { mlx4_err(dev, "Only same port types supported " "on this HCA, aborting.\n"); return -EINVAL; } } } for (i = 0; i < dev->caps.num_ports; i++) { if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not " "supported on this HCA\n", i + 1); return -EINVAL; } } return 0; } static void mlx4_set_port_mask(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; ++i) dev->caps.port_mask[i] = dev->caps.port_type[i]; } static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; int i; err = mlx4_QUERY_DEV_CAP(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } if (dev_cap->min_page_sz > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", dev_cap->min_page_sz, PAGE_SIZE); return -ENODEV; } if (dev_cap->num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev_cap->num_ports, MLX4_MAX_PORTS); return -ENODEV; } if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev_cap->uar_size, (unsigned long long) pci_resource_len(dev->pdev, 2)); return -ENODEV; } dev->caps.num_ports = dev_cap->num_ports; dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.vl_cap[i] = dev_cap->max_vl[i]; dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; /* set gid and pkey table operating lengths by default * to non-sriov values */ dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; dev->caps.def_mac[i] = dev_cap->def_mac[i]; dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; dev->caps.default_sense[i] = dev_cap->default_sense[i]; dev->caps.trans_type[i] = dev_cap->trans_type[i]; dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; dev->caps.wavelength[i] = dev_cap->wavelength[i]; dev->caps.trans_code[i] = dev_cap->trans_code[i]; } dev->caps.uar_page_size = PAGE_SIZE; dev->caps.num_uars = dev_cap->uar_size / PAGE_SIZE; dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay; dev->caps.bf_reg_size = dev_cap->bf_reg_size; dev->caps.bf_regs_per_page = dev_cap->bf_regs_per_page; dev->caps.max_sq_sg = dev_cap->max_sq_sg; dev->caps.max_rq_sg = dev_cap->max_rq_sg; dev->caps.max_wqes = dev_cap->max_qp_sz; dev->caps.max_qp_init_rdma = dev_cap->max_requester_per_qp; dev->caps.max_srq_wqes = dev_cap->max_srq_sz; dev->caps.max_srq_sge = dev_cap->max_rq_sg - 1; dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; /* * Subtract 1 from the limit because we need to allocate a * spare CQE to enable resizing the CQ */ dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; dev->caps.reserved_mtts = dev_cap->reserved_mtts; dev->caps.reserved_mrws = dev_cap->reserved_mrws; /* The first 128 UARs are used for EQ doorbells */ dev->caps.reserved_uars = max_t(int, 128, dev_cap->reserved_uars); dev->caps.reserved_pds = dev_cap->reserved_pds; dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->reserved_xrcds : 0; dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? dev_cap->max_xrcds : 0; dev->caps.mtt_entry_sz = dev_cap->mtt_entry_sz; dev->caps.max_msg_sz = dev_cap->max_msg_sz; dev->caps.page_size_cap = ~(u32) (dev_cap->min_page_sz - 1); dev->caps.flags = dev_cap->flags; dev->caps.flags2 = dev_cap->flags2; dev->caps.bmme_flags = dev_cap->bmme_flags; dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.cq_timestamp = dev_cap->timestamp_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; /* Don't do sense port on multifunction devices (for now at least) */ if (mlx4_is_mfunc(dev)) dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; dev->caps.fast_drop = fast_drop ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FAST_DROP) : 0; for (i = 1; i <= dev->caps.num_ports; ++i) { dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE; if (dev->caps.supported_type[i]) { /* if only ETH is supported - assign ETH */ if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH; /* if only IB is supported, assign IB */ else if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_IB) dev->caps.port_type[i] = MLX4_PORT_TYPE_IB; else { /* * if IB and ETH are supported, we set the port * type according to user selection of port type; * if there is no user selection, take the FW hint */ int pta; mlx4_get_val(port_type_array.dbdf2val.tbl, pci_physfn(dev->pdev), i - 1, &pta); if (pta == MLX4_PORT_TYPE_NONE) { dev->caps.port_type[i] = dev->caps.suggested_type[i] ? MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB; } else if (pta == MLX4_PORT_TYPE_NA) { mlx4_err(dev, "Port %d is valid port. " "It is not allowed to configure its type to N/A(%d)\n", i, MLX4_PORT_TYPE_NA); return -EINVAL; } else { dev->caps.port_type[i] = pta; } } } /* * Link sensing is allowed on the port if 3 conditions are true: * 1. Both protocols are supported on the port. * 2. Different types are supported on the port * 3. FW declared that it supports link sensing */ mlx4_priv(dev)->sense.sense_allowed[i] = ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)); /* Disablling auto sense for default Eth ports support */ mlx4_priv(dev)->sense.sense_allowed[i] = 0; /* * If "default_sense" bit is set, we move the port to "AUTO" mode * and perform sense_port FW command to try and set the correct * port type from beginning */ if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) { enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE; dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO; mlx4_SENSE_PORT(dev, i, &sensed_port); if (sensed_port != MLX4_PORT_TYPE_NONE) dev->caps.port_type[i] = sensed_port; } else { dev->caps.possible_type[i] = dev->caps.port_type[i]; } if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { dev->caps.log_num_macs = dev_cap->log_max_macs[i]; mlx4_warn(dev, "Requested number of MACs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_macs); } if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; mlx4_warn(dev, "Requested number of VLANs is too much " "for port %d, reducing to %d.\n", i, 1 << dev->caps.log_num_vlans); } } dev->caps.max_basic_counters = dev_cap->max_basic_counters; dev->caps.max_extended_counters = dev_cap->max_extended_counters; /* support extended counters if available */ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS_EXT) dev->caps.max_counters = dev->caps.max_extended_counters; else dev->caps.max_counters = dev->caps.max_basic_counters; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = (1 << dev->caps.log_num_macs) * (1 << dev->caps.log_num_vlans) * dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; dev->caps.sync_qp = dev_cap->sync_qp; if (dev->pdev->device == 0x1003) dev->caps.cq_flags |= MLX4_DEV_CAP_CQ_FLAG_IO; dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; if (!mlx4_enable_64b_cqe_eqe && !mlx4_is_slave(dev)) { if (dev_cap->flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) { mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n"); dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE; dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE; } } if ((dev->caps.flags & (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) && mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; if (!mlx4_is_slave(dev)) { for (i = 0; i < dev->caps.num_ports; ++i) dev->caps.def_counter_index[i] = i << 1; } return 0; } /*The function checks if there are live vf, return the num of them*/ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; int i; int ret = 0; for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; if (s_state->active && s_state->last_cmd != MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "%s: slave: %d is still active\n", __func__, i); ret++; } } return ret; } int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ qk += qpn - dev->phys_caps.base_tunnel_sqpn; else qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } EXPORT_SYMBOL(mlx4_get_parav_qkey); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->virt2phys_pkey[slave][port - 1][i] = val; } EXPORT_SYMBOL(mlx4_sync_pkey_table); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return; priv->slave_node_guids[slave] = guid; } EXPORT_SYMBOL(mlx4_put_slave_node_guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); if (!mlx4_is_master(dev)) return 0; return priv->slave_node_guids[slave]; } EXPORT_SYMBOL(mlx4_get_slave_node_guid); int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_slave; if (!mlx4_is_master(dev)) return 0; s_slave = &priv->mfunc.master.slave_state[slave]; return !!s_slave->active; } EXPORT_SYMBOL(mlx4_is_slave_active); static void slave_adjust_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *hca_param) { dev->caps.steering_mode = hca_param->steering_mode; if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; else dev->caps.num_qp_per_mgm = 4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2); mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); } static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; struct mlx4_dev_cap dev_cap; struct mlx4_func_cap func_cap; struct mlx4_init_hca_param hca_param; int i; memset(&hca_param, 0, sizeof(hca_param)); err = mlx4_QUERY_HCA(dev, &hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting.\n"); return err; } /*fail if the hca has an unknown capability */ if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) != HCA_GLOBAL_CAP_MASK) { mlx4_err(dev, "Unknown hca global capabilities\n"); return -ENOSYS; } mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz; dev->caps.hca_core_clock = hca_param.hca_core_clock; memset(&dev_cap, 0, sizeof(dev_cap)); dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); return err; } err = mlx4_QUERY_FW(dev); if (err) mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n"); if (!hca_param.mw_enable) { dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; dev->caps.bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; } page_size = ~dev->caps.page_size_cap + 1; mlx4_warn(dev, "HCA minimum page size:%d\n", page_size); if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %d, aborting.\n", page_size, PAGE_SIZE); return -ENODEV; } /* slave gets uar page size from QUERY_HCA fw command */ dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12); /* TODO: relax this assumption */ if (dev->caps.uar_page_size != PAGE_SIZE) { mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %d\n", dev->caps.uar_page_size, PAGE_SIZE); return -ENODEV; } memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", err); return err; } if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour\n"); return -ENOSYS; } dev->caps.num_ports = func_cap.num_ports; dev->quotas.qp = func_cap.qp_quota; dev->quotas.srq = func_cap.srq_quota; dev->quotas.cq = func_cap.cq_quota; dev->quotas.mpt = func_cap.mpt_quota; dev->quotas.mtt = func_cap.mtt_quota; dev->caps.num_qps = 1 << hca_param.log_num_qps; dev->caps.num_srqs = 1 << hca_param.log_num_srqs; dev->caps.num_cqs = 1 << hca_param.log_num_cqs; dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; dev->caps.num_eqs = func_cap.max_eq; dev->caps.reserved_eqs = func_cap.reserved_eq; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, " "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS); return -ENODEV; } dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { err = -ENOMEM; goto err_mem; } for (i = 1; i <= dev->caps.num_ports; ++i) { err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" " port %d, aborting (%d).\n", i, err); goto err_mem; } dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.def_counter_index[i - 1] = func_cap.def_counter_index; dev->caps.port_mask[i] = dev->caps.port_type[i]; err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i]); if (err) goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > pci_resource_len(dev->pdev, 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than " "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); err = -ENOMEM; goto err_mem; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { dev->caps.eqe_size = 32; dev->caps.eqe_factor = 0; } if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE; } else { dev->caps.cqe_size = 32; } dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode.\n"); slave_adjust_steering_mode(dev, &dev_cap, &hca_param); return 0; err_mem: kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); dev->caps.qp0_tunnel = dev->caps.qp0_proxy = dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; return err; } static void mlx4_request_modules(struct mlx4_dev *dev) { int port; int has_ib_port = false; int has_eth_port = false; #define EN_DRV_NAME "mlx4_en" #define IB_DRV_NAME "mlx4_ib" for (port = 1; port <= dev->caps.num_ports; port++) { if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) has_ib_port = true; else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) has_eth_port = true; } if (has_ib_port) request_module_nowait(IB_DRV_NAME); if (has_eth_port) request_module_nowait(EN_DRV_NAME); } /* * Change the port configuration of the device. * Every user of this function must hold the port mutex. */ int mlx4_change_port_types(struct mlx4_dev *dev, enum mlx4_port_type *port_types) { int err = 0; int change = 0; int port; for (port = 0; port < dev->caps.num_ports; port++) { /* Change the port type only if the new type is different * from the current, and not set to Auto */ if (port_types[port] != dev->caps.port_type[port + 1]) change = 1; } if (change) { mlx4_unregister_device(dev); for (port = 1; port <= dev->caps.num_ports; port++) { mlx4_CLOSE_PORT(dev, port); dev->caps.port_type[port] = port_types[port - 1]; err = mlx4_SET_PORT(dev, port, -1); if (err) { mlx4_err(dev, "Failed to set port %d, " "aborting\n", port); goto out; } } mlx4_set_port_mask(dev); err = mlx4_register_device(dev); if (err) { mlx4_err(dev, "Failed to register device\n"); goto out; } mlx4_request_modules(dev); } out: return err; } static ssize_t show_port_type(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; char type[8]; sprintf(type, "%s", (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ? "ib" : "eth"); if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO) sprintf(buf, "auto (%s)\n", type); else sprintf(buf, "%s\n", type); return strlen(buf); } static ssize_t set_port_type(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; enum mlx4_port_type new_types[MLX4_MAX_PORTS]; int i; int err = 0; if (!strcmp(buf, "ib\n")) info->tmp_type = MLX4_PORT_TYPE_IB; else if (!strcmp(buf, "eth\n")) info->tmp_type = MLX4_PORT_TYPE_ETH; else if (!strcmp(buf, "auto\n")) info->tmp_type = MLX4_PORT_TYPE_AUTO; else { mlx4_err(mdev, "%s is not supported port type\n", buf); return -EINVAL; } if ((info->tmp_type & mdev->caps.supported_type[info->port]) != info->tmp_type) { mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", info->port); return -EINVAL; } mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); /* Possible type is always the one that was delivered */ mdev->caps.possible_type[info->port] = info->tmp_type; for (i = 0; i < mdev->caps.num_ports; i++) { types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type : mdev->caps.possible_type[i+1]; if (types[i] == MLX4_PORT_TYPE_AUTO) types[i] = mdev->caps.port_type[i+1]; } if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) && !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) { for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; err = -EINVAL; } } } if (err) { mlx4_err(mdev, "Auto sensing is not supported on this HCA. " "Set only 'eth' or 'ib' for both ports " "(should be the same)\n"); goto out; } mlx4_do_sense_ports(mdev, new_types, types); err = mlx4_check_port_params(mdev, new_types); if (err) goto out; /* We are about to apply the changes after the configuration * was verified, no need to remember the temporary types * any more */ for (i = 0; i < mdev->caps.num_ports; i++) priv->port[i + 1].tmp_type = 0; err = mlx4_change_port_types(mdev, new_types); out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); return err ? err : count; } enum ibta_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; static inline int int_to_ibta_mtu(int mtu) { switch (mtu) { case 256: return IB_MTU_256; case 512: return IB_MTU_512; case 1024: return IB_MTU_1024; case 2048: return IB_MTU_2048; case 4096: return IB_MTU_4096; default: return -1; } } static inline int ibta_mtu_to_int(enum ibta_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } static ssize_t show_port_ib_mtu(struct device *dev, struct device_attribute *attr, char *buf) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; + /* When port type is eth, port mtu value isn't used. */ if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) - mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); + return -EINVAL; sprintf(buf, "%d\n", ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port])); return strlen(buf); } static ssize_t set_port_ib_mtu(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info, port_mtu_attr); struct mlx4_dev *mdev = info->dev; struct mlx4_priv *priv = mlx4_priv(mdev); int err, port, mtu, ibta_mtu = -1; if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) { mlx4_warn(mdev, "port level mtu is only used for IB ports\n"); return -EINVAL; } mtu = (int) simple_strtol(buf, NULL, 0); ibta_mtu = int_to_ibta_mtu(mtu); if (ibta_mtu < 0) { mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); return -EINVAL; } mdev->caps.port_ib_mtu[info->port] = ibta_mtu; mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); mlx4_unregister_device(mdev); for (port = 1; port <= mdev->caps.num_ports; port++) { mlx4_CLOSE_PORT(mdev, port); err = mlx4_SET_PORT(mdev, port, -1); if (err) { mlx4_err(mdev, "Failed to set port %d, " "aborting\n", port); goto err_set_port; } } err = mlx4_register_device(mdev); err_set_port: mutex_unlock(&priv->port_mutex); mlx4_start_sense(mdev); return err ? err : count; } static int mlx4_load_fw(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err, unmap_flag = 0; priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.fw_icm) { mlx4_err(dev, "Couldn't allocate FW area, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_FA(dev, priv->fw.fw_icm); if (err) { mlx4_err(dev, "MAP_FA command failed, aborting.\n"); goto err_free; } err = mlx4_RUN_FW(dev); if (err) { mlx4_err(dev, "RUN_FW command failed, aborting.\n"); goto err_unmap_fa; } return 0; err_unmap_fa: unmap_flag = mlx4_UNMAP_FA(dev); if (unmap_flag) pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); err_free: if (!unmap_flag) mlx4_free_icm(dev, priv->fw.fw_icm, 0); return err; } static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, int cmpt_entry_sz) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int num_eqs; err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_QP * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) goto err; err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_SRQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) goto err_qp; err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_CQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) goto err_srq; num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * cmpt_entry_sz) << MLX4_CMPT_SHIFT), cmpt_entry_sz, num_eqs, num_eqs, 0, 0); if (err) goto err_cq; return 0; err_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); err_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); err_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err: return err; } static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, struct mlx4_init_hca_param *init_hca, u64 icm_size) { struct mlx4_priv *priv = mlx4_priv(dev); u64 aux_pages; int num_eqs; int err, unmap_flag = 0; err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages); if (err) { mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n"); return err; } mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n", (unsigned long long) icm_size >> 10, (unsigned long long) aux_pages << 2); priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages, GFP_HIGHUSER | __GFP_NOWARN, 0); if (!priv->fw.aux_icm) { mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n"); return -ENOMEM; } err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm); if (err) { mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n"); goto err_free_aux; } err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz); if (err) { mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n"); goto err_unmap_aux; } num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : dev->caps.num_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map EQ context memory, aborting.\n"); goto err_unmap_cmpt; } /* * Reserved MTT entries must be aligned up to a cacheline * boundary, since the FW will write to them, while the driver * writes to all other MTT entries. (The variable * dev->caps.mtt_entry_sz below is really the MTT segment * size, not the raw entry size) */ dev->caps.reserved_mtts = ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz, dma_get_cache_alignment()) / dev->caps.mtt_entry_sz; err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, init_hca->mtt_base, dev->caps.mtt_entry_sz, dev->caps.num_mtts, dev->caps.reserved_mtts, 1, 0); if (err) { mlx4_err(dev, "Failed to map MTT context memory, aborting.\n"); goto err_unmap_eq; } err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table, init_hca->dmpt_base, dev_cap->dmpt_entry_sz, dev->caps.num_mpts, dev->caps.reserved_mrws, 1, 1); if (err) { mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n"); goto err_unmap_mtt; } err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table, init_hca->qpc_base, dev_cap->qpc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map QP context memory, aborting.\n"); goto err_unmap_dmpt; } err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table, init_hca->auxc_base, dev_cap->aux_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n"); goto err_unmap_qp; } err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table, init_hca->altc_base, dev_cap->altc_entry_sz, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n"); goto err_unmap_auxc; } err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table, init_hca->rdmarc_base, dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift, dev->caps.num_qps, dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 0, 0); if (err) { mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n"); goto err_unmap_altc; } err = mlx4_init_icm_table(dev, &priv->cq_table.table, init_hca->cqc_base, dev_cap->cqc_entry_sz, dev->caps.num_cqs, dev->caps.reserved_cqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map CQ context memory, aborting.\n"); goto err_unmap_rdmarc; } err = mlx4_init_icm_table(dev, &priv->srq_table.table, init_hca->srqc_base, dev_cap->srq_entry_sz, dev->caps.num_srqs, dev->caps.reserved_srqs, 0, 0); if (err) { mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n"); goto err_unmap_cq; } /* * For flow steering device managed mode it is required to use * mlx4_init_icm_table. For B0 steering mode it's not strictly * required, but for simplicity just map the whole multicast * group table now. The table isn't very big and it's a lot * easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, init_hca->mc_base, mlx4_get_mgm_entry_size(dev), dev->caps.num_mgms + dev->caps.num_amgms, dev->caps.num_mgms + dev->caps.num_amgms, 0, 0); if (err) { mlx4_err(dev, "Failed to map MCG context memory, aborting.\n"); goto err_unmap_srq; } return 0; err_unmap_srq: mlx4_cleanup_icm_table(dev, &priv->srq_table.table); err_unmap_cq: mlx4_cleanup_icm_table(dev, &priv->cq_table.table); err_unmap_rdmarc: mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); err_unmap_altc: mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); err_unmap_auxc: mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); err_unmap_qp: mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); err_unmap_dmpt: mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); err_unmap_mtt: mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); err_unmap_eq: mlx4_cleanup_icm_table(dev, &priv->eq_table.table); err_unmap_cmpt: mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); err_unmap_aux: unmap_flag = mlx4_UNMAP_ICM_AUX(dev); if (unmap_flag) pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); err_free_aux: if (!unmap_flag) mlx4_free_icm(dev, priv->fw.aux_icm, 0); return err; } static void mlx4_free_icms(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_cleanup_icm_table(dev, &priv->mcg_table.table); mlx4_cleanup_icm_table(dev, &priv->srq_table.table); mlx4_cleanup_icm_table(dev, &priv->cq_table.table); mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table); mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table); mlx4_cleanup_icm_table(dev, &priv->eq_table.table); mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table); mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table); if (!mlx4_UNMAP_ICM_AUX(dev)) mlx4_free_icm(dev, priv->fw.aux_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_ICM_AUX failed.\n"); } static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } static int map_bf_area(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); resource_size_t bf_start; resource_size_t bf_len; int err = 0; if (!dev->caps.bf_reg_size) return -ENXIO; bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) err = -ENOMEM; return err; } static void unmap_bf_area(struct mlx4_dev *dev) { if (mlx4_priv(dev)->bf_mapping) io_mapping_free(mlx4_priv(dev)->bf_mapping); } int mlx4_read_clock(struct mlx4_dev *dev) { u32 clockhi, clocklo, clockhi1; cycle_t cycles; int i; struct mlx4_priv *priv = mlx4_priv(dev); if (!priv->clock_mapping) return -ENOTSUPP; for (i = 0; i < 10; i++) { clockhi = swab32(readl(priv->clock_mapping)); clocklo = swab32(readl(priv->clock_mapping + 4)); clockhi1 = swab32(readl(priv->clock_mapping)); if (clockhi == clockhi1) break; } cycles = (u64) clockhi << 32 | (u64) clocklo; return cycles; } EXPORT_SYMBOL_GPL(mlx4_read_clock); static int map_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) return -ENOMEM; return 0; } int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_clock_params *params) { struct mlx4_priv *priv = mlx4_priv(dev); if (mlx4_is_slave(dev)) return -ENOTSUPP; if (!params) return -EINVAL; params->bar = priv->fw.clock_bar; params->offset = priv->fw.clock_offset; params->size = MLX4_CLOCK_SIZE; return 0; } EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params); static void unmap_internal_clock(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); if (priv->clock_mapping) iounmap(priv->clock_mapping); } static void mlx4_close_hca(struct mlx4_dev *dev) { unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { mlx4_slave_exit(dev); } else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); if (!mlx4_UNMAP_FA(dev)) mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); } } static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); u64 dma = (u64) priv->mfunc.vhcr_dma; int num_of_reset_retries = NUM_OF_RESET_RETRIES; int ret_from_reset = 0; u32 slave_read; u32 cmd_channel_ver; mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) { msleep(SLEEP_TIME_IN_RESET); while (ret_from_reset && num_of_reset_retries) { mlx4_warn(dev, "slave is currently in the" "middle of FLR. retrying..." "(try num:%d)\n", (NUM_OF_RESET_RETRIES - num_of_reset_retries + 1)); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); num_of_reset_retries = num_of_reset_retries - 1; } } else goto err; } /* check the driver version - the slave I/F revision * must match the master's */ slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); cmd_channel_ver = mlx4_comm_get_version(); if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) != MLX4_COMM_GET_IF_REV(slave_read)) { mlx4_err(dev, "slave driver version is not supported" " by the master\n"); goto err; } mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev) { int i; for (i = 1; i <= dev->caps.num_ports; i++) { if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) dev->caps.gid_table_len[i] = mlx4_get_slave_num_gids(dev, 0); else dev->caps.gid_table_len[i] = 1; dev->caps.pkey_table_len[i] = dev->phys_caps.pkey_phys_table_len[i] - 1; } } static int choose_log_fs_mgm_entry_size(int qp_per_entry) { int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE; i++) { if (qp_per_entry <= 4 * ((1 << i) / 16 - 2)) break; } return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; } static void choose_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int nvfs; mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(dev->pdev), 0, &nvfs); if (high_rate_steer && !mlx4_is_mfunc(dev)) { dev->caps.flags &= ~(MLX4_DEV_CAP_FLAG_VEP_MC_STEER | MLX4_DEV_CAP_FLAG_VEP_UC_STEER); dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_FS_EN; } if (mlx4_log_num_mgm_entry_size == -1 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || (dev_cap->fs_max_num_qp_per_entry >= (nvfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry); dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED; dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry; } else { if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) dev->caps.steering_mode = MLX4_STEERING_MODE_B0; else { dev->caps.steering_mode = MLX4_STEERING_MODE_A0; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER || dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags " "set to use B0 steering. Falling back to A0 steering mode.\n"); } dev->oper_log_mgm_entry_size = mlx4_log_num_mgm_entry_size > 0 ? mlx4_log_num_mgm_entry_size : MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE; dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev); } mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, " "log_num_mgm_entry_size = %d\n", mlx4_steering_mode_str(dev->caps.steering_mode), dev->oper_log_mgm_entry_size, mlx4_log_num_mgm_entry_size); } static int mlx4_init_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_dev_cap *dev_cap = NULL; struct mlx4_adapter adapter; struct mlx4_mod_stat_cfg mlx4_cfg; struct mlx4_profile profile; struct mlx4_init_hca_param init_hca; u64 icm_size; int err; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); if (err) { if (err == -EACCES) mlx4_info(dev, "non-primary physical function, skipping.\n"); else mlx4_err(dev, "QUERY_FW command failed, aborting.\n"); return err; } err = mlx4_load_fw(dev); if (err) { mlx4_err(dev, "Failed to start FW, aborting.\n"); return err; } mlx4_cfg.log_pg_sz_m = 1; mlx4_cfg.log_pg_sz = 0; err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); if (err) mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); dev_cap = kzalloc(sizeof *dev_cap, GFP_KERNEL); if (!dev_cap) { mlx4_err(dev, "Failed to allocate memory for dev_cap\n"); err = -ENOMEM; goto err_stop_fw; } err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); goto err_stop_fw; } choose_steering_mode(dev, dev_cap); if (mlx4_is_master(dev)) mlx4_parav_master_pf_caps(dev); process_mod_param_profile(&profile); if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) profile.num_mcg = MLX4_FS_NUM_MCG; icm_size = mlx4_make_profile(dev, &profile, dev_cap, &init_hca); if ((long long) icm_size < 0) { err = icm_size; goto err_stop_fw; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; err = mlx4_init_icm(dev, dev_cap, &init_hca, icm_size); if (err) goto err_stop_fw; init_hca.mw_enable = 1; err = mlx4_INIT_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "INIT_HCA command failed, aborting.\n"); goto err_free_icm; } /* * Read HCA frequency by QUERY_HCA command */ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { memset(&init_hca, 0, sizeof(init_hca)); err = mlx4_QUERY_HCA(dev, &init_hca); if (err) { mlx4_err(dev, "QUERY_HCA command failed, disable timestamp.\n"); dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; } else { dev->caps.hca_core_clock = init_hca.hca_core_clock; } /* In case we got HCA frequency 0 - disable timestamping * to avoid dividing by zero */ if (!dev->caps.hca_core_clock) { dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_err(dev, "HCA frequency is 0. Timestamping is not supported."); } else if (map_internal_clock(dev)) { /* Map internal clock, * in case of failure disable timestamping */ dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported.\n"); } } } else { err = mlx4_init_slave(dev); if (err) { mlx4_err(dev, "Failed to initialize slave\n"); return err; } err = mlx4_slave_cap(dev); if (err) { mlx4_err(dev, "Failed to obtain slave caps\n"); goto err_close; } } if (map_bf_area(dev)) mlx4_dbg(dev, "Failed to map blue flame area\n"); /* Only the master set the ports, all the rest got it from it.*/ if (!mlx4_is_slave(dev)) mlx4_set_port_mask(dev); err = mlx4_QUERY_ADAPTER(dev, &adapter); if (err) { mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n"); goto unmap_bf; } priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); memcpy(dev->vsd, adapter.vsd, sizeof(dev->vsd)); dev->vsd_vendor_id = adapter.vsd_vendor_id; if (!mlx4_is_slave(dev)) kfree(dev_cap); return 0; unmap_bf: if (!mlx4_is_slave(dev)) unmap_internal_clock(dev); unmap_bf_area(dev); if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (mlx4_is_slave(dev)) mlx4_slave_exit(dev); else mlx4_CLOSE_HCA(dev, 0); err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); err_stop_fw: if (!mlx4_is_slave(dev)) { if (!mlx4_UNMAP_FA(dev)) mlx4_free_icm(dev, priv->fw.fw_icm, 0); else pr_warn("mlx4_core: mlx4_UNMAP_FA failed.\n"); kfree(dev_cap); } return err; } static int mlx4_init_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int nent_pow2, port_indx, vf_index, num_counters; int res, index = 0; struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; if (!mlx4_is_slave(dev) && dev->caps.max_counters == dev->caps.max_extended_counters) { res = mlx4_cmd(dev, MLX4_IF_STATE_EXTENDED, 0, 0, MLX4_CMD_SET_IF_STAT, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (res) { mlx4_err(dev, "Failed to set extended counters (err=%d)\n", res); return res; } } mutex_init(&priv->counters_table.mutex); if (mlx4_is_slave(dev)) { for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); if (dev->caps.def_counter_index[port_indx] != 0xFF) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; new_counter_index->index = dev->caps.def_counter_index[port_indx]; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); } } mlx4_dbg(dev, "%s: slave allocated %d counters for %d ports\n", __func__, dev->caps.num_ports, dev->caps.num_ports); return 0; } nent_pow2 = roundup_pow_of_two(dev->caps.max_counters); for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.global_port_list[port_indx]); /* allocating 2 counters per port for PFs */ /* For the PF, the ETH default counters are 0,2; */ /* and the RoCE default counters are 1,3 */ for (num_counters = 0; num_counters < 2; num_counters++, index++) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; new_counter_index->index = index; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port_indx]); } } if (mlx4_is_master(dev)) { for (vf_index = 0; vf_index < dev->num_vfs; vf_index++) { for (port_indx = 0; port_indx < dev->caps.num_ports; port_indx++) { INIT_LIST_HEAD(&priv->counters_table.vf_list[vf_index][port_indx]); new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) return -ENOMEM; if (index < nent_pow2 - 2) { new_counter_index->index = index; index++; } else { new_counter_index->index = MLX4_SINK_COUNTER_INDEX; } list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[vf_index][port_indx]); } } res = mlx4_bitmap_init(&priv->counters_table.bitmap, nent_pow2, nent_pow2 - 1, index, 1); mlx4_dbg(dev, "%s: master allocated %d counters for %d VFs\n", __func__, index, dev->num_vfs); } else { res = mlx4_bitmap_init(&priv->counters_table.bitmap, nent_pow2, nent_pow2 - 1, index, 1); mlx4_dbg(dev, "%s: native allocated %d counters for %d ports\n", __func__, index, dev->caps.num_ports); } return 0; } static void mlx4_cleanup_counters_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int i, j; struct counter_index *port, *tmp_port; struct counter_index *vf, *tmp_vf; mutex_lock(&priv->counters_table.mutex); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) { for (i = 0; i < dev->caps.num_ports; i++) { list_for_each_entry_safe(port, tmp_port, &priv->counters_table.global_port_list[i], list) { list_del(&port->list); kfree(port); } } if (!mlx4_is_slave(dev)) { for (i = 0; i < dev->num_vfs; i++) { for (j = 0; j < dev->caps.num_ports; j++) { list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[i][j], list) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); list_del(&vf->list); kfree(vf); } } } mlx4_bitmap_cleanup(&priv->counters_table.bitmap); } } mutex_unlock(&priv->counters_table.mutex); } int __mlx4_slave_counters_free(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); int i, first; struct counter_index *vf, *tmp_vf; /* clean VF's counters for the next useg */ if (slave > 0 && slave <= dev->num_vfs) { mlx4_dbg(dev, "%s: free counters of slave(%d)\n" , __func__, slave); mutex_lock(&priv->counters_table.mutex); for (i = 0; i < dev->caps.num_ports; i++) { first = 0; list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[slave - 1][i], list) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); if (first++ && vf->index != MLX4_SINK_COUNTER_INDEX) { mlx4_dbg(dev, "%s: delete counter index %d for slave %d and port %d\n" , __func__, vf->index, slave, i + 1); mlx4_bitmap_free(&priv->counters_table.bitmap, vf->index, MLX4_USE_RR); list_del(&vf->list); kfree(vf); } else { mlx4_dbg(dev, "%s: can't delete default counter index %d for slave %d and port %d\n" , __func__, vf->index, slave, i + 1); } } } mutex_unlock(&priv->counters_table.mutex); } return 0; } int __mlx4_counter_alloc(struct mlx4_dev *dev, int slave, int port, u32 *idx) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index; if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)) return -ENOENT; if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || (port < 0) || (port > MLX4_MAX_PORTS)) { mlx4_dbg(dev, "%s: invalid slave(%d) or port(%d) index\n", __func__, slave, port); return -EINVAL; } /* handle old guest request does not support request by port index */ if (port == 0) { *idx = MLX4_SINK_COUNTER_INDEX; mlx4_dbg(dev, "%s: allocated default counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); return 0; } mutex_lock(&priv->counters_table.mutex); *idx = mlx4_bitmap_alloc(&priv->counters_table.bitmap); /* if no resources return the default counter of the slave and port */ if (*idx == -1) { if (slave == 0) { /* its the ethernet counter ?????? */ new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); } else { new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, struct counter_index, list); } *idx = new_counter_index->index; mlx4_dbg(dev, "%s: allocated defualt counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); goto out; } if (slave == 0) { /* native or master */ new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) goto no_mem; new_counter_index->index = *idx; list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); } else { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) goto no_mem; new_counter_index->index = *idx; list_add_tail(&new_counter_index->list, &priv->counters_table.vf_list[slave - 1][port - 1]); } mlx4_dbg(dev, "%s: allocated counter index %d for slave %d port %d\n" , __func__, *idx, slave, port); out: mutex_unlock(&priv->counters_table.mutex); return 0; no_mem: mlx4_bitmap_free(&priv->counters_table.bitmap, *idx, MLX4_USE_RR); mutex_unlock(&priv->counters_table.mutex); *idx = MLX4_SINK_COUNTER_INDEX; mlx4_dbg(dev, "%s: failed err (%d)\n" , __func__, -ENOMEM); return -ENOMEM; } int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx) { u64 out_param; int err; struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index, *c_index; if (mlx4_is_mfunc(dev)) { err = mlx4_cmd_imm(dev, 0, &out_param, ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) { *idx = get_param_l(&out_param); if (*idx == MLX4_SINK_COUNTER_INDEX) return -ENOSPC; mutex_lock(&priv->counters_table.mutex); c_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); mutex_unlock(&priv->counters_table.mutex); if (c_index->index == *idx) return -EEXIST; if (mlx4_is_slave(dev)) { new_counter_index = kmalloc(sizeof(struct counter_index), GFP_KERNEL); if (!new_counter_index) { mlx4_counter_free(dev, port, *idx); return -ENOMEM; } new_counter_index->index = *idx; mutex_lock(&priv->counters_table.mutex); list_add_tail(&new_counter_index->list, &priv->counters_table.global_port_list[port - 1]); mutex_unlock(&priv->counters_table.mutex); mlx4_dbg(dev, "%s: allocated counter index %d for port %d\n" , __func__, *idx, port); } } return err; } return __mlx4_counter_alloc(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_alloc); void __mlx4_counter_free(struct mlx4_dev *dev, int slave, int port, u32 idx) { /* check if native or slave and deletes acordingly */ struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *pf, *tmp_pf; struct counter_index *vf, *tmp_vf; int first; if (idx == MLX4_SINK_COUNTER_INDEX) { mlx4_dbg(dev, "%s: try to delete default counter index %d for port %d\n" , __func__, idx, port); return; } if ((slave > MLX4_MAX_NUM_VF) || (slave < 0) || (port < 0) || (port > MLX4_MAX_PORTS)) { mlx4_warn(dev, "%s: deletion failed due to invalid slave(%d) or port(%d) index\n" , __func__, slave, idx); return; } mutex_lock(&priv->counters_table.mutex); if (slave == 0) { first = 0; list_for_each_entry_safe(pf, tmp_pf, &priv->counters_table.global_port_list[port - 1], list) { /* the first 2 counters are reserved */ if (pf->index == idx) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, pf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, pf->index); if (1 < first && idx != MLX4_SINK_COUNTER_INDEX) { list_del(&pf->list); kfree(pf); mlx4_dbg(dev, "%s: delete counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); goto out; } else { mlx4_dbg(dev, "%s: can't delete default counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); goto out; } } first++; } mlx4_dbg(dev, "%s: can't delete counter index %d for native device (%d) port %d\n" , __func__, idx, slave, port); } else { first = 0; list_for_each_entry_safe(vf, tmp_vf, &priv->counters_table.vf_list[slave - 1][port - 1], list) { /* the first element is reserved */ if (vf->index == idx) { /* clear the counter statistic */ if (__mlx4_clear_if_stat(dev, vf->index)) mlx4_dbg(dev, "%s: reset counter %d failed\n", __func__, vf->index); if (first) { list_del(&vf->list); kfree(vf); mlx4_dbg(dev, "%s: delete counter index %d for slave %d port %d\n", __func__, idx, slave, port); mlx4_bitmap_free(&priv->counters_table.bitmap, idx, MLX4_USE_RR); goto out; } else { mlx4_dbg(dev, "%s: can't delete default slave (%d) counter index %d for port %d\n" , __func__, slave, idx, port); goto out; } } first++; } mlx4_dbg(dev, "%s: can't delete slave (%d) counter index %d for port %d\n" , __func__, slave, idx, port); } out: mutex_unlock(&priv->counters_table.mutex); } void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx) { u64 in_param = 0; struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *counter, *tmp_counter; int first = 0; if (mlx4_is_mfunc(dev)) { set_param_l(&in_param, idx); mlx4_cmd(dev, in_param, ((u32) port) << 8 | (u32) RES_COUNTER, RES_OP_RESERVE, MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (mlx4_is_slave(dev) && idx != MLX4_SINK_COUNTER_INDEX) { mutex_lock(&priv->counters_table.mutex); list_for_each_entry_safe(counter, tmp_counter, &priv->counters_table.global_port_list[port - 1], list) { if (counter->index == idx && first++) { list_del(&counter->list); kfree(counter); mlx4_dbg(dev, "%s: delete counter index %d for port %d\n" , __func__, idx, port); mutex_unlock(&priv->counters_table.mutex); return; } } mutex_unlock(&priv->counters_table.mutex); } return; } __mlx4_counter_free(dev, 0, port, idx); } EXPORT_SYMBOL_GPL(mlx4_counter_free); int __mlx4_clear_if_stat(struct mlx4_dev *dev, u8 counter_index) { struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; int err = 0; u32 if_stat_in_mod = (counter_index & 0xff) | (1 << 31); if (counter_index == MLX4_SINK_COUNTER_INDEX) return -EINVAL; if (mlx4_is_slave(dev)) return 0; if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(if_stat_mailbox)) { err = PTR_ERR(if_stat_mailbox); return err; } err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev, if_stat_mailbox); return err; } u8 mlx4_get_default_counter_index(struct mlx4_dev *dev, int slave, int port) { struct mlx4_priv *priv = mlx4_priv(dev); struct counter_index *new_counter_index; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB) { mlx4_dbg(dev, "%s: return counter index %d for slave %d port (MLX4_PORT_TYPE_IB) %d\n", __func__, MLX4_SINK_COUNTER_INDEX, slave, port); return (u8)MLX4_SINK_COUNTER_INDEX; } mutex_lock(&priv->counters_table.mutex); if (slave == 0) { new_counter_index = list_entry(priv->counters_table.global_port_list[port - 1].next, struct counter_index, list); } else { new_counter_index = list_entry(priv->counters_table.vf_list[slave - 1][port - 1].next, struct counter_index, list); } mutex_unlock(&priv->counters_table.mutex); mlx4_dbg(dev, "%s: return counter index %d for slave %d port %d\n", __func__, new_counter_index->index, slave, port); return (u8)new_counter_index->index; } int mlx4_get_vport_ethtool_stats(struct mlx4_dev *dev, int port, struct mlx4_en_vport_stats *vport_stats, int reset) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *if_stat_mailbox = NULL; union mlx4_counter *counter; int err = 0; u32 if_stat_in_mod; struct counter_index *vport, *tmp_vport; if (!vport_stats) return -EINVAL; if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(if_stat_mailbox)) { err = PTR_ERR(if_stat_mailbox); return err; } mutex_lock(&priv->counters_table.mutex); list_for_each_entry_safe(vport, tmp_vport, &priv->counters_table.global_port_list[port - 1], list) { if (vport->index == MLX4_SINK_COUNTER_INDEX) continue; memset(if_stat_mailbox->buf, 0, sizeof(union mlx4_counter)); if_stat_in_mod = (vport->index & 0xff) | ((reset & 1) << 31); err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0, MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); if (err) { mlx4_dbg(dev, "%s: failed to read statistics for counter index %d\n", __func__, vport->index); goto if_stat_out; } counter = (union mlx4_counter *)if_stat_mailbox->buf; if ((counter->control.cnt_mode & 0xf) == 1) { vport_stats->rx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastFrames); vport_stats->rx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxUnicastFrames); vport_stats->rx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfRxMulticastFrames); vport_stats->tx_broadcast_packets += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastFrames); vport_stats->tx_unicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxUnicastFrames); vport_stats->tx_multicast_packets += be64_to_cpu(counter->ext.counters[0].IfTxMulticastFrames); vport_stats->rx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxBroadcastOctets); vport_stats->rx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxUnicastOctets); vport_stats->rx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfRxMulticastOctets); vport_stats->tx_broadcast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxBroadcastOctets); vport_stats->tx_unicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxUnicastOctets); vport_stats->tx_multicast_bytes += be64_to_cpu(counter->ext.counters[0].IfTxMulticastOctets); vport_stats->rx_errors += be64_to_cpu(counter->ext.counters[0].IfRxErrorFrames); vport_stats->rx_dropped += be64_to_cpu(counter->ext.counters[0].IfRxNoBufferFrames); vport_stats->tx_errors += be64_to_cpu(counter->ext.counters[0].IfTxDroppedFrames); } } if_stat_out: mutex_unlock(&priv->counters_table.mutex); mlx4_free_cmd_mailbox(dev, if_stat_mailbox); return err; } EXPORT_SYMBOL_GPL(mlx4_get_vport_ethtool_stats); static int mlx4_setup_hca(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; int port; __be32 ib_port_default_caps; err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "user access region table (err=%d), aborting.\n", err); return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); if (err) { mlx4_err(dev, "Failed to allocate driver access region " "(err=%d), aborting.\n", err); goto err_uar_table_free; } priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!priv->kar) { mlx4_err(dev, "Couldn't map kernel access region, " "aborting.\n"); err = -ENOMEM; goto err_uar_free; } err = mlx4_init_pd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "protection domain table (err=%d), aborting.\n", err); goto err_kar_unmap; } err = mlx4_init_xrcd_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "reliable connection domain table (err=%d), " "aborting.\n", err); goto err_pd_table_free; } err = mlx4_init_mr_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "memory region table (err=%d), aborting.\n", err); goto err_xrcd_table_free; } if (!mlx4_is_slave(dev)) { err = mlx4_init_mcg_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "multicast group table (err=%d), aborting.\n", err); goto err_mr_table_free; } } err = mlx4_init_eq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "event queue table (err=%d), aborting.\n", err); goto err_mcg_table_free; } err = mlx4_cmd_use_events(dev); if (err) { mlx4_err(dev, "Failed to switch to event-driven " "firmware commands (err=%d), aborting.\n", err); goto err_eq_table_free; } err = mlx4_NOP(dev); if (err) { if (dev->flags & MLX4_FLAG_MSI_X) { mlx4_warn(dev, "NOP command failed to generate MSI-X " "interrupt IRQ %d).\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_warn(dev, "Trying again without MSI-X.\n"); } else { mlx4_err(dev, "NOP command failed to generate interrupt " "(IRQ %d), aborting.\n", priv->eq_table.eq[dev->caps.num_comp_vectors].irq); mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n"); } goto err_cmd_poll; } mlx4_dbg(dev, "NOP command IRQ test passed\n"); err = mlx4_init_cq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "completion queue table (err=%d), aborting.\n", err); goto err_cmd_poll; } err = mlx4_init_srq_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "shared receive queue table (err=%d), aborting.\n", err); goto err_cq_table_free; } err = mlx4_init_qp_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "queue pair table (err=%d), aborting.\n", err); goto err_srq_table_free; } err = mlx4_init_counters_table(dev); if (err && err != -ENOENT) { mlx4_err(dev, "Failed to initialize counters table (err=%d), " "aborting.\n", err); goto err_qp_table_free; } if (!mlx4_is_slave(dev)) { for (port = 1; port <= dev->caps.num_ports; port++) { ib_port_default_caps = 0; err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps); if (err) mlx4_warn(dev, "failed to get port %d default " "ib capabilities (%d). Continuing " "with caps = 0\n", port, err); dev->caps.ib_port_def_cap[port] = ib_port_default_caps; /* initialize per-slave default ib port capabilities */ if (mlx4_is_master(dev)) { int i; for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; priv->mfunc.master.slave_state[i].ib_cap_mask[port] = ib_port_default_caps; } } dev->caps.port_ib_mtu[port] = IB_MTU_4096; err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ? dev->caps.pkey_table_len[port] : -1); if (err) { mlx4_err(dev, "Failed to set port %d (err=%d), " "aborting\n", port, err); goto err_counters_table_free; } } } return 0; err_counters_table_free: mlx4_cleanup_counters_table(dev); err_qp_table_free: mlx4_cleanup_qp_table(dev); err_srq_table_free: mlx4_cleanup_srq_table(dev); err_cq_table_free: mlx4_cleanup_cq_table(dev); err_cmd_poll: mlx4_cmd_use_polling(dev); err_eq_table_free: mlx4_cleanup_eq_table(dev); err_mcg_table_free: if (!mlx4_is_slave(dev)) mlx4_cleanup_mcg_table(dev); err_mr_table_free: mlx4_cleanup_mr_table(dev); err_xrcd_table_free: mlx4_cleanup_xrcd_table(dev); err_pd_table_free: mlx4_cleanup_pd_table(dev); err_kar_unmap: iounmap(priv->kar); err_uar_free: mlx4_uar_free(dev, &priv->driver_uar); err_uar_table_free: mlx4_cleanup_uar_table(dev); return err; } static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; int nreq = min_t(int, dev->caps.num_ports * min_t(int, num_possible_cpus() + 1, MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int err; int i; if (msi_x) { nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); if (msi_x > 1 && !mlx4_is_mfunc(dev)) nreq = min_t(int, nreq, msi_x); entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); if (!entries) goto no_msi; for (i = 0; i < nreq; ++i) entries[i].entry = i; retry: err = pci_enable_msix(dev->pdev, entries, nreq); if (err) { /* Try again if at least 2 vectors are available */ if (err > 1) { mlx4_info(dev, "Requested %d vectors, " "but only %d MSI-X vectors available, " "trying again\n", nreq, err); nreq = err; goto retry; } kfree(entries); + /* if error, or can't alloc even 1 IRQ */ + if (err < 0) { + mlx4_err(dev, "No IRQs left, device can't " + "be started.\n"); + goto no_irq; + } goto no_msi; } if (nreq < MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) { /*Working in legacy mode , all EQ's shared*/ dev->caps.comp_pool = 0; dev->caps.num_comp_vectors = nreq - 1; } else { dev->caps.comp_pool = nreq - MSIX_LEGACY_SZ; dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1; } for (i = 0; i < nreq; ++i) priv->eq_table.eq[i].irq = entries[i].vector; dev->flags |= MLX4_FLAG_MSI_X; kfree(entries); return; } no_msi: dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) priv->eq_table.eq[i].irq = dev->pdev->irq; + return; +no_irq: + dev->caps.num_comp_vectors = 0; + dev->caps.comp_pool = 0; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err = 0; info->dev = dev; info->port = port; if (!mlx4_is_slave(dev)) { mlx4_init_mac_table(dev, &info->mac_table); mlx4_init_vlan_table(dev, &info->vlan_table); info->base_qpn = mlx4_get_base_qpn(dev, port); } sprintf(info->dev_name, "mlx4_port%d", port); info->port_attr.attr.name = info->dev_name; if (mlx4_is_mfunc(dev)) info->port_attr.attr.mode = S_IRUGO; else { info->port_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_attr.store = set_port_type; } info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; } sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); info->port_mtu_attr.attr.name = info->dev_mtu_name; if (mlx4_is_mfunc(dev)) info->port_mtu_attr.attr.mode = S_IRUGO; else { info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR; info->port_mtu_attr.store = set_port_ib_mtu; } info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->pdev->dev, &info->port_attr); info->port = -1; } return err; } static void mlx4_cleanup_port_info(struct mlx4_port_info *info) { if (info->port < 0) return; device_remove_file(&info->dev->pdev->dev, &info->port_attr); device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int num_entries = dev->caps.num_ports; int i, j; priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL); if (!priv->steer) return -ENOMEM; for (i = 0; i < num_entries; i++) for (j = 0; j < MLX4_NUM_STEERS; j++) { INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]); INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]); } return 0; } static void mlx4_clear_steering(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp, *tmp_pqp; int num_entries = dev->caps.num_ports; int i, j; for (i = 0; i < num_entries; i++) { for (j = 0; j < MLX4_NUM_STEERS; j++) { list_for_each_entry_safe(pqp, tmp_pqp, &priv->steer[i].promisc_qps[j], list) { list_del(&pqp->list); kfree(pqp); } list_for_each_entry_safe(entry, tmp_entry, &priv->steer[i].steer_entries[j], list) { list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } } } kfree(priv->steer); } static int extended_func_num(struct pci_dev *pdev) { return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn); } #define MLX4_OWNER_BASE 0x8069c #define MLX4_OWNER_SIZE 4 static int mlx4_get_ownership(struct mlx4_dev *dev) { void __iomem *owner; u32 ret; if (pci_channel_offline(dev->pdev)) return -EIO; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return -ENOMEM; } ret = readl(owner); iounmap(owner); return (int) !!ret; } static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; if (pci_channel_offline(dev->pdev)) return; owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); return; } writel(0, owner); msleep(1000); iounmap(owner); } static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) { struct mlx4_priv *priv; struct mlx4_dev *dev; int err; int port; int nvfs, prb_vf; pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev)); err = pci_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Cannot enable PCI device, " "aborting.\n"); return err; } mlx4_get_val(num_vfs.dbdf2val.tbl, pci_physfn(pdev), 0, &nvfs); mlx4_get_val(probe_vf.dbdf2val.tbl, pci_physfn(pdev), 0, &prb_vf); if (nvfs > MLX4_MAX_NUM_VF) { dev_err(&pdev->dev, "There are more VF's (%d) than allowed(%d)\n", nvfs, MLX4_MAX_NUM_VF); return -EINVAL; } if (nvfs < 0) { dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n"); return -EINVAL; } /* * Check for BARs. */ if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%x)\n", pci_dev_data, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); goto err_disable_pdev; } pci_set_master(pdev); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); goto err_release_regions; } } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " "consistent PCI DMA mask.\n"); err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " "aborting.\n"); goto err_release_regions; } } /* Allow large DMA segments, up to the firmware limit of 1 GB */ dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); priv = kzalloc(sizeof *priv, GFP_KERNEL); if (!priv) { dev_err(&pdev->dev, "Device struct alloc failed, " "aborting.\n"); err = -ENOMEM; goto err_release_regions; } dev = &priv->dev; dev->pdev = pdev; INIT_LIST_HEAD(&priv->dev_list); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&priv->port_mutex); INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); dev->rev_id = pdev->revision; dev->numa_node = dev_to_node(&pdev->dev); /* Detect if this device is a virtual function */ if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (nvfs && extended_func_num(pdev) > prb_vf) { mlx4_warn(dev, "Skipping virtual function:%d\n", extended_func_num(pdev)); err = -ENODEV; goto err_free_dev; } mlx4_warn(dev, "Detected virtual function - running in slave mode\n"); dev->flags |= MLX4_FLAG_SLAVE; } else { /* We reset the device and enable SRIOV only for physical * devices. Try to claim ownership on the device; * if already taken, skip -- do not allow multiple PFs */ err = mlx4_get_ownership(dev); if (err) { if (err < 0) goto err_free_dev; else { mlx4_warn(dev, "Multiple PFs not yet supported." " Skipping PF.\n"); err = -EINVAL; goto err_free_dev; } } if (nvfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", nvfs); err = pci_enable_sriov(pdev, nvfs); if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); dev->flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev->num_vfs = nvfs; } } atomic_set(&priv->opreq_count, 0); INIT_WORK(&priv->opreq_task, mlx4_opreq_action); /* * Now reset the HCA before we touch the PCI capabilities or * attempt a firmware command, since a boot ROM may have left * the HCA in an undefined state. */ err = mlx4_reset(dev); if (err) { mlx4_err(dev, "Failed to reset HCA, aborting.\n"); goto err_sriov; } } slave_start: err = mlx4_cmd_init(dev); if (err) { mlx4_err(dev, "Failed to init command interface, aborting.\n"); goto err_sriov; } /* In slave functions, the communication channel must be initialized * before posting commands. Also, init num_slaves before calling * mlx4_init_hca */ if (mlx4_is_mfunc(dev)) { if (mlx4_is_master(dev)) dev->num_slaves = MLX4_MAX_NUM_SLAVES; else { dev->num_slaves = 0; err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init slave mfunc" " interface, aborting.\n"); goto err_cmd; } } } err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ mlx4_cmd_cleanup(dev); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; } else goto err_mfunc; } /* In master functions, the communication channel must be initialized * after obtaining its address from fw */ if (mlx4_is_master(dev)) { err = mlx4_multi_func_init(dev); if (err) { mlx4_err(dev, "Failed to init master mfunc" "interface, aborting.\n"); goto err_close; } } err = mlx4_alloc_eq_table(dev); if (err) goto err_master_mfunc; priv->msix_ctl.pool_bm = 0; mutex_init(&priv->msix_ctl.pool_lock); mlx4_enable_msi_x(dev); + + /* no MSIX and no shared IRQ */ + if (!dev->caps.num_comp_vectors && !dev->caps.comp_pool) { + err = -ENOSPC; + goto err_free_eq; + } + if ((mlx4_is_mfunc(dev)) && !(dev->flags & MLX4_FLAG_MSI_X)) { err = -ENOSYS; mlx4_err(dev, "INTx is not supported in multi-function mode." " aborting.\n"); goto err_free_eq; } if (!mlx4_is_slave(dev)) { err = mlx4_init_steering(dev); if (err) goto err_free_eq; } err = mlx4_setup_hca(dev); if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) && !mlx4_is_mfunc(dev)) { dev->flags &= ~MLX4_FLAG_MSI_X; dev->caps.num_comp_vectors = 1; dev->caps.comp_pool = 0; pci_disable_msix(pdev); err = mlx4_setup_hca(dev); } if (err) goto err_steer; mlx4_init_quotas(dev); for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); if (err) goto err_port; } err = mlx4_register_device(dev); if (err) goto err_port; mlx4_request_modules(dev); mlx4_sense_init(dev); mlx4_start_sense(dev); priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; err_port: for (--port; port >= 1; --port) mlx4_cleanup_port_info(&priv->port[port]); mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); err_steer: if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); err_free_eq: mlx4_free_eq_table(dev); err_master_mfunc: if (mlx4_is_master(dev)) { mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); mlx4_multi_func_cleanup(dev); } if (mlx4_is_slave(dev)) { kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); } err_close: if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); mlx4_close_hca(dev); err_mfunc: if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); err_cmd: mlx4_cmd_cleanup(dev); err_sriov: if (dev->flags & MLX4_FLAG_SRIOV) pci_disable_sriov(pdev); if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); err_free_dev: kfree(priv); err_release_regions: pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; } static int __devinit mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { printk_once(KERN_INFO "%s", mlx4_version); return __mlx4_init_one(pdev, id->driver_data); } static void mlx4_remove_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); int p; if (dev) { /* in SRIOV it is not allowed to unload the pf's * driver while there are alive vf's */ if (mlx4_is_master(dev)) { if (mlx4_how_many_lives_vf(dev)) mlx4_err(dev, "Removing PF when there are assigned VF's !!!\n"); } mlx4_stop_sense(dev); mlx4_unregister_device(dev); for (p = 1; p <= dev->caps.num_ports; p++) { mlx4_cleanup_port_info(&priv->port[p]); mlx4_CLOSE_PORT(dev, p); } if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_SLAVES_ONLY); mlx4_cleanup_counters_table(dev); mlx4_cleanup_qp_table(dev); mlx4_cleanup_srq_table(dev); mlx4_cleanup_cq_table(dev); mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mcg_table(dev); mlx4_cleanup_mr_table(dev); mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); if (mlx4_is_master(dev)) mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY); iounmap(priv->kar); mlx4_uar_free(dev, &priv->driver_uar); mlx4_cleanup_uar_table(dev); if (!mlx4_is_slave(dev)) mlx4_clear_steering(dev); mlx4_free_eq_table(dev); if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); mlx4_close_hca(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); mlx4_cmd_cleanup(dev); if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); kfree(dev->caps.qp0_tunnel); kfree(dev->caps.qp0_proxy); kfree(dev->caps.qp1_tunnel); kfree(dev->caps.qp1_proxy); kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } } static int restore_current_port_types(struct mlx4_dev *dev, enum mlx4_port_type *types, enum mlx4_port_type *poss_types) { struct mlx4_priv *priv = mlx4_priv(dev); int err, i; mlx4_stop_sense(dev); mutex_lock(&priv->port_mutex); for (i = 0; i < dev->caps.num_ports; i++) dev->caps.possible_type[i + 1] = poss_types[i]; err = mlx4_change_port_types(dev, types); mlx4_start_sense(dev); mutex_unlock(&priv->port_mutex); return err; } int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev *dev = pci_get_drvdata(pdev); struct mlx4_priv *priv = mlx4_priv(dev); enum mlx4_port_type curr_type[MLX4_MAX_PORTS]; enum mlx4_port_type poss_type[MLX4_MAX_PORTS]; int pci_dev_data, err, i; pci_dev_data = priv->pci_dev_data; for (i = 0; i < dev->caps.num_ports; i++) { curr_type[i] = dev->caps.port_type[i + 1]; poss_type[i] = dev->caps.possible_type[i + 1]; } mlx4_remove_one(pdev); err = __mlx4_init_one(pdev, pci_dev_data); if (err) return err; dev = pci_get_drvdata(pdev); err = restore_current_port_types(dev, curr_type, poss_type); if (err) mlx4_err(dev, "mlx4_restart_one: could not restore original port types (%d)\n", err); return 0; } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR */ { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR */ { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25400 Family [ConnectX-2 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */ { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */ { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */ { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */ { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */ { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */ { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */ { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */ { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */ { 0, } }; MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { mlx4_remove_one(pdev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { int ret = __mlx4_init_one(pdev, 0); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, }; static int suspend(struct pci_dev *pdev, pm_message_t state) { mlx4_remove_one(pdev); return 0; } static int resume(struct pci_dev *pdev) { return __mlx4_init_one(pdev, 0); } static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, .remove = __devexit_p(mlx4_remove_one), .suspend = suspend, .resume = resume, .err_handler = &mlx4_err_handler, }; static int __init mlx4_verify_params(void) { int status; status = update_defaults(&port_type_array); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&port_type_array.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } status = update_defaults(&num_vfs); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&num_vfs.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } status = update_defaults(&probe_vf); if (status == INVALID_STR) { if (mlx4_fill_dbdf2val_tbl(&probe_vf.dbdf2val)) return -1; } else if (status == INVALID_DATA) { return -1; } if (msi_x < 0) { pr_warn("mlx4_core: bad msi_x: %d\n", msi_x); return -1; } if ((log_num_mac < 0) || (log_num_mac > 7)) { pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac); return -1; } if (log_num_vlan != 0) pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n", MLX4_LOG_NUM_VLANS); if (mlx4_set_4k_mtu != -1) pr_warning("mlx4_core: set_4k_mtu - obsolete module param\n"); if ((log_mtts_per_seg < 0) || (log_mtts_per_seg > 7)) { pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg); return -1; } if (mlx4_log_num_mgm_entry_size != -1 && (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not " "in legal range (-1 or %d..%d)\n", mlx4_log_num_mgm_entry_size, MLX4_MIN_MGM_LOG_ENTRY_SIZE, MLX4_MAX_MGM_LOG_ENTRY_SIZE); return -1; } if (mod_param_profile.num_qp < 18 || mod_param_profile.num_qp > 23) { pr_warning("mlx4_core: bad log_num_qp: %d\n", mod_param_profile.num_qp); return -1; } if (mod_param_profile.num_srq < 10) { pr_warning("mlx4_core: too low log_num_srq: %d\n", mod_param_profile.num_srq); return -1; } if (mod_param_profile.num_cq < 10) { pr_warning("mlx4_core: too low log_num_cq: %d\n", mod_param_profile.num_cq); return -1; } if (mod_param_profile.num_mpt < 10) { pr_warning("mlx4_core: too low log_num_mpt: %d\n", mod_param_profile.num_mpt); return -1; } if (mod_param_profile.num_mtt_segs && mod_param_profile.num_mtt_segs < 15) { pr_warning("mlx4_core: too low log_num_mtt: %d\n", mod_param_profile.num_mtt_segs); return -1; } if (mod_param_profile.num_mtt_segs > MLX4_MAX_LOG_NUM_MTT) { pr_warning("mlx4_core: too high log_num_mtt: %d\n", mod_param_profile.num_mtt_segs); return -1; } return 0; } static int __init mlx4_init(void) { int ret; if (mlx4_verify_params()) return -EINVAL; mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) return -ENOMEM; if (enable_sys_tune) sys_tune_init(); ret = pci_register_driver(&mlx4_driver); if (ret < 0) goto err; return 0; err: if (enable_sys_tune) sys_tune_fini(); destroy_workqueue(mlx4_wq); return ret; } static void __exit mlx4_cleanup(void) { if (enable_sys_tune) sys_tune_fini(); pci_unregister_driver(&mlx4_driver); destroy_workqueue(mlx4_wq); } module_init_order(mlx4_init, SI_ORDER_MIDDLE); module_exit(mlx4_cleanup); #include static int mlx4_evhand(module_t mod, int event, void *arg) { return (0); } static moduledata_t mlx4_mod = { .name = "mlx4", .evhand = mlx4_evhand, }; MODULE_VERSION(mlx4, 1); DECLARE_MODULE(mlx4, mlx4_mod, SI_SUB_OFED_PREINIT, SI_ORDER_ANY); Index: stable/9/sys/ofed/drivers/net/mlx4/mcg.c =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/mcg.c (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/mcg.c (revision 279734) @@ -1,1538 +1,1543 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include +#include #include "mlx4.h" int mlx4_get_mgm_entry_size(struct mlx4_dev *dev) { return 1 << dev->oper_log_mgm_entry_size; } int mlx4_get_qp_per_mgm(struct mlx4_dev *dev) { return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2); } static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, u32 size, u64 *reg_id) { u64 imm; int err = 0; err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0, MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; *reg_id = imm; return err; } static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid) { int err = 0; err = mlx4_cmd(dev, regid, 0, 0, MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index, struct mlx4_cmd_mailbox *mailbox) { return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 port, u8 steer, struct mlx4_cmd_mailbox *mailbox) { u32 in_mod; in_mod = (u32) port << 16 | steer << 1; return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1, MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, u16 *hash, u8 op_mod) { u64 imm; int err; err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod, MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (!err) *hash = imm; return err; } static struct mlx4_promisc_qp *get_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_promisc_qp *pqp; if (port < 1 || port > dev->caps.num_ports) return NULL; s_steer = &mlx4_priv(dev)->steer[port - 1]; list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { if (pqp->qpn == qpn) return pqp; } /* not found */ return NULL; } /* * Add new entry to steering data structure. * All promisc QPs should be added as well */ static int new_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; struct mlx4_steer_index *new_entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp = NULL; u32 prot; int err; if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); if (!new_entry) return -ENOMEM; INIT_LIST_HEAD(&new_entry->duplicates); new_entry->index = index; list_add_tail(&new_entry->list, &s_steer->steer_entries[steer]); /* If the given qpn is also a promisc qp, * it should be inserted to duplicates list */ pqp = get_promisc_qp(dev, port, steer, qpn); if (pqp) { dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_alloc; } dqp->qpn = qpn; list_add_tail(&dqp->list, &new_entry->duplicates); } /* if no promisc qps for this vep, we are done */ if (list_empty(&s_steer->promisc_qps[steer])) return 0; /* now need to add all the promisc qps to the new * steering entry, as they should also receive the packets * destined to this address */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; goto out_alloc; } mgm = mailbox->buf; err = mlx4_READ_ENTRY(dev, index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; prot = be32_to_cpu(mgm->members_count) >> 30; list_for_each_entry(pqp, &s_steer->promisc_qps[steer], list) { /* don't add already existing qpn */ if (pqp->qpn == qpn) continue; if (members_count == dev->caps.num_qp_per_mgm) { /* out of space */ err = -ENOMEM; goto out_mailbox; } /* add the qpn */ mgm->qp[members_count++] = cpu_to_be32(pqp->qpn & MGM_QPN_MASK); } /* update the qps count and update the entry with all the promisc qps*/ mgm->members_count = cpu_to_be32(members_count | (prot << 30)); err = mlx4_WRITE_ENTRY(dev, index, mailbox); out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); if (!err) return 0; out_alloc: if (dqp) { list_del(&dqp->list); kfree(dqp); } list_del(&new_entry->list); kfree(new_entry); return err; } /* update the data structures with existing steering entry */ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; pqp = get_promisc_qp(dev, port, steer, qpn); if (!pqp) return 0; /* nothing to do */ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { if (tmp_entry->index == index) { entry = tmp_entry; break; } } if (unlikely(!entry)) { mlx4_warn(dev, "Steering entry at index %x is not registered\n", index); return -EINVAL; } /* the given qpn is listed as a promisc qpn * we need to add it as a duplicate to this entry * for future references */ list_for_each_entry(dqp, &entry->duplicates, list) { if (qpn == dqp->qpn) return 0; /* qp is already duplicated */ } /* add the qp as a duplicate on this index */ dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) return -ENOMEM; dqp->qpn = qpn; list_add_tail(&dqp->list, &entry->duplicates); return 0; } /* Check whether a qpn is a duplicate on steering entry * If so, it should not be removed from mgm */ static bool check_duplicate_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *tmp_entry, *entry = NULL; struct mlx4_promisc_qp *dqp, *tmp_dqp; if (port < 1 || port > dev->caps.num_ports) return NULL; s_steer = &mlx4_priv(dev)->steer[port - 1]; /* if qp is not promisc, it cannot be duplicated */ if (!get_promisc_qp(dev, port, steer, qpn)) return false; /* The qp is promisc qp so it is a duplicate on this index * Find the index entry, and remove the duplicate */ list_for_each_entry(tmp_entry, &s_steer->steer_entries[steer], list) { if (tmp_entry->index == index) { entry = tmp_entry; break; } } if (unlikely(!entry)) { mlx4_warn(dev, "Steering entry for index %x is not registered\n", index); return false; } list_for_each_entry_safe(dqp, tmp_dqp, &entry->duplicates, list) { if (dqp->qpn == qpn) { list_del(&dqp->list); kfree(dqp); } } return true; } /* * returns true if all the QPs != tqpn contained in this entry * are Promisc QPs. return false otherwise. */ static bool promisc_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 tqpn, u32 *members_count) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 m_count; bool ret = false; int i; if (port < 1 || port > dev->caps.num_ports) return false; s_steer = &mlx4_priv(dev)->steer[port - 1]; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return false; mgm = mailbox->buf; if (mlx4_READ_ENTRY(dev, index, mailbox)) goto out; m_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (members_count) *members_count = m_count; for (i = 0; i < m_count; i++) { u32 qpn = be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK; if (!get_promisc_qp(dev, port, steer, qpn) && qpn != tqpn) { /* the qp is not promisc, the entry can't be removed */ goto out; } } ret = true; out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; } /* IF a steering entry contains only promisc QPs, it can be removed. */ static bool can_remove_steering_entry(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, unsigned int index, u32 tqpn) { struct mlx4_steer *s_steer; struct mlx4_steer_index *entry = NULL, *tmp_entry; u32 members_count; bool ret = false; if (port < 1 || port > dev->caps.num_ports) return NULL; s_steer = &mlx4_priv(dev)->steer[port - 1]; if (!promisc_steering_entry(dev, port, steer, index, tqpn, &members_count)) goto out; /* All the qps currently registered for this entry are promiscuous, * Checking for duplicates */ ret = true; list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { if (entry->index == index) { if (list_empty(&entry->duplicates) || members_count == 1) { struct mlx4_promisc_qp *pqp, *tmp_pqp; /* * If there is only 1 entry in duplicates than * this is the QP we want to delete, going over * the list and deleting the entry. */ list_del(&entry->list); list_for_each_entry_safe(pqp, tmp_pqp, &entry->duplicates, list) { list_del(&pqp->list); kfree(pqp); } kfree(entry); } else { /* This entry contains duplicates so it shouldn't be removed */ ret = false; goto out; } } } out: return ret; } static int add_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; u32 prot; int i; bool found; int err; struct mlx4_priv *priv = mlx4_priv(dev); if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); if (get_promisc_qp(dev, port, steer, qpn)) { err = 0; /* Noting to do, already exists */ goto out_mutex; } pqp = kmalloc(sizeof *pqp, GFP_KERNEL); if (!pqp) { err = -ENOMEM; goto out_mutex; } pqp->qpn = qpn; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; goto out_alloc; } mgm = mailbox->buf; if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { /* the promisc qp needs to be added for each one of the steering * entries, if it already exists, needs to be added as a duplicate * for this entry */ list_for_each_entry(entry, &s_steer->steer_entries[steer], list) { err = mlx4_READ_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; prot = be32_to_cpu(mgm->members_count) >> 30; found = false; for (i = 0; i < members_count; i++) { if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { /* Entry already exists, add to duplicates */ dqp = kmalloc(sizeof *dqp, GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_mailbox; } dqp->qpn = qpn; list_add_tail(&dqp->list, &entry->duplicates); found = true; } } if (!found) { /* Need to add the qpn to mgm */ if (members_count == dev->caps.num_qp_per_mgm) { /* entry is full */ err = -ENOMEM; goto out_mailbox; } mgm->qp[members_count++] = cpu_to_be32(qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | (prot << 30)); err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; } } } /* add the new qpn to list of promisc qps */ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); /* now need to add all the promisc qps to default entry */ memset(mgm, 0, sizeof *mgm); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { if (members_count == dev->caps.num_qp_per_mgm) { /* entry is full */ err = -ENOMEM; goto out_list; } mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); } mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); if (err) goto out_list; mlx4_free_cmd_mailbox(dev, mailbox); mutex_unlock(&priv->mcg_table.mutex); return 0; out_list: list_del(&pqp->list); out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); out_alloc: kfree(pqp); out_mutex: mutex_unlock(&priv->mcg_table.mutex); return err; } static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, enum mlx4_steer_type steer, u32 qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_steer *s_steer; struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; struct mlx4_steer_index *entry, *tmp_entry; struct mlx4_promisc_qp *pqp; struct mlx4_promisc_qp *dqp; u32 members_count; bool found; bool back_to_list = false; int i, loc = -1; int err; if (port < 1 || port > dev->caps.num_ports) return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; mutex_lock(&priv->mcg_table.mutex); pqp = get_promisc_qp(dev, port, steer, qpn); if (unlikely(!pqp)) { mlx4_warn(dev, "QP %x is not promiscuous QP\n", qpn); /* nothing to do */ err = 0; goto out_mutex; } /*remove from list of promisc qps */ list_del(&pqp->list); /* set the default entry not to include the removed one */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) { err = -ENOMEM; back_to_list = true; goto out_list; } mgm = mailbox->buf; memset(mgm, 0, sizeof *mgm); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) mgm->qp[members_count++] = cpu_to_be32(dqp->qpn & MGM_QPN_MASK); mgm->members_count = cpu_to_be32(members_count | MLX4_PROT_ETH << 30); err = mlx4_WRITE_PROMISC(dev, port, steer, mailbox); if (err) goto out_mailbox; if (!(mlx4_is_mfunc(dev) && steer == MLX4_UC_STEER)) { /* remove the qp from all the steering entries*/ list_for_each_entry_safe(entry, tmp_entry, &s_steer->steer_entries[steer], list) { found = false; list_for_each_entry(dqp, &entry->duplicates, list) { if (dqp->qpn == qpn) { found = true; break; } } if (found) { /* a duplicate, no need to change the mgm, * only update the duplicates list */ list_del(&dqp->list); kfree(dqp); } else { err = mlx4_READ_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (!members_count) { mlx4_warn(dev, "QP %06x wasn't found in entry %x mcount=0." " deleting entry...\n", qpn, entry->index); list_del(&entry->list); kfree(entry); continue; } for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qpn) { loc = i; break; } if (loc < 0) { mlx4_err(dev, "QP %06x wasn't found in entry %d\n", qpn, entry->index); err = -EINVAL; goto out_mailbox; } /* copy the last QP in this MGM over removed QP */ mgm->qp[loc] = mgm->qp[members_count - 1]; mgm->qp[members_count - 1] = 0; mgm->members_count = cpu_to_be32(--members_count | (MLX4_PROT_ETH << 30)); err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); if (err) goto out_mailbox; } } } out_mailbox: mlx4_free_cmd_mailbox(dev, mailbox); out_list: if (back_to_list) list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); else kfree(pqp); out_mutex: mutex_unlock(&priv->mcg_table.mutex); return err; } /* * Caller must hold MCG table semaphore. gid and mgm parameters must * be properly aligned for command interface. * * Returns 0 unless a firmware command error occurs. * * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 * and *mgm holds MGM entry. * * if GID is found in AMGM, *index = index in AMGM, *prev = index of * previous entry in hash chain and *mgm holds AMGM entry. * * If no AMGM exists for given gid, *index = -1, *prev = index of last * entry in hash chain and *mgm holds end of hash chain. */ static int find_entry(struct mlx4_dev *dev, u8 port, u8 *gid, enum mlx4_protocol prot, struct mlx4_cmd_mailbox *mgm_mailbox, int *prev, int *index) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; u16 hash; u8 op_mod = (prot == MLX4_PROT_ETH) ? !!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) : 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return -ENOMEM; mgid = mailbox->buf; memcpy(mgid, gid, 16); err = mlx4_GID_HASH(dev, mailbox, &hash, op_mod); mlx4_free_cmd_mailbox(dev, mailbox); if (err) return err; - if (0) - mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, hash); + if (0) { + mlx4_dbg(dev, "Hash for "GID_PRINT_FMT" is %04x\n", + GID_PRINT_ARGS(gid), hash); + } *index = hash; *prev = -1; do { err = mlx4_READ_ENTRY(dev, *index, mgm_mailbox); if (err) return err; if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { if (*index != hash) { mlx4_err(dev, "Found zero MGID in AMGM.\n"); err = -EINVAL; } return err; } if (!memcmp(mgm->gid, gid, 16) && be32_to_cpu(mgm->members_count) >> 30 == prot) return err; *prev = *index; *index = be32_to_cpu(mgm->next_gid_index) >> 6; } while (*index); *index = -1; return err; } static const u8 __promisc_mode[] = { [MLX4_FS_REGULAR] = 0x0, [MLX4_FS_ALL_DEFAULT] = 0x1, [MLX4_FS_MC_DEFAULT] = 0x3, [MLX4_FS_UC_SNIFFER] = 0x4, [MLX4_FS_MC_SNIFFER] = 0x5, }; int map_sw_to_hw_steering_mode(struct mlx4_dev *dev, enum mlx4_net_trans_promisc_mode flow_type) { if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) { mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); return -EINVAL; } return __promisc_mode[flow_type]; } EXPORT_SYMBOL_GPL(map_sw_to_hw_steering_mode); static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl, struct mlx4_net_trans_rule_hw_ctrl *hw) { u8 flags = 0; flags = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0; flags |= ctrl->exclusive ? (1 << 2) : 0; flags |= ctrl->allow_loopback ? (1 << 3) : 0; hw->flags = flags; hw->type = __promisc_mode[ctrl->promisc_mode]; hw->prio = cpu_to_be16(ctrl->priority); hw->port = ctrl->port; hw->qpn = cpu_to_be32(ctrl->qpn); } const u16 __sw_id_hw[] = { [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001, [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005, [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003, [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002, [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004, [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006 }; int map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } return __sw_id_hw[id]; } EXPORT_SYMBOL_GPL(map_sw_to_hw_steering_id); static const int __rule_hw_sz[] = { [MLX4_NET_TRANS_RULE_ID_ETH] = sizeof(struct mlx4_net_trans_rule_hw_eth), [MLX4_NET_TRANS_RULE_ID_IB] = sizeof(struct mlx4_net_trans_rule_hw_ib), [MLX4_NET_TRANS_RULE_ID_IPV6] = 0, [MLX4_NET_TRANS_RULE_ID_IPV4] = sizeof(struct mlx4_net_trans_rule_hw_ipv4), [MLX4_NET_TRANS_RULE_ID_TCP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp), [MLX4_NET_TRANS_RULE_ID_UDP] = sizeof(struct mlx4_net_trans_rule_hw_tcp_udp) }; int hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id) { if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { mlx4_err(dev, "Invalid network rule id. id = %d\n", id); return -EINVAL; } return __rule_hw_sz[id]; } EXPORT_SYMBOL_GPL(hw_rule_sz); static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec, struct _rule_hw *rule_hw) { if (hw_rule_sz(dev, spec->id) < 0) return -EINVAL; memset(rule_hw, 0, hw_rule_sz(dev, spec->id)); rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]); rule_hw->size = hw_rule_sz(dev, spec->id) >> 2; switch (spec->id) { case MLX4_NET_TRANS_RULE_ID_ETH: memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN); memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk, ETH_ALEN); memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN); memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk, ETH_ALEN); if (spec->eth.ether_type_enable) { rule_hw->eth.ether_type_enable = 1; rule_hw->eth.ether_type = spec->eth.ether_type; } rule_hw->eth.vlan_tag = spec->eth.vlan_id; rule_hw->eth.vlan_tag_msk = spec->eth.vlan_id_msk; break; case MLX4_NET_TRANS_RULE_ID_IB: rule_hw->ib.l3_qpn = spec->ib.l3_qpn; rule_hw->ib.qpn_mask = spec->ib.qpn_msk; memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16); memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16); break; case MLX4_NET_TRANS_RULE_ID_IPV6: return -EOPNOTSUPP; case MLX4_NET_TRANS_RULE_ID_IPV4: rule_hw->ipv4.src_ip = spec->ipv4.src_ip; rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk; rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip; rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk; break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port; rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk; rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port; rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk; break; default: return -EINVAL; } return __rule_hw_sz[spec->id]; } static void mlx4_err_rule(struct mlx4_dev *dev, char *str, struct mlx4_net_trans_rule *rule) { #define BUF_SIZE 256 struct mlx4_spec_list *cur; char buf[BUF_SIZE]; int len = 0; mlx4_err(dev, "%s", str); len += snprintf(buf + len, BUF_SIZE - len, "port = %d prio = 0x%x qp = 0x%x ", rule->port, rule->priority, rule->qpn); list_for_each_entry(cur, &rule->list, list) { switch (cur->id) { case MLX4_NET_TRANS_RULE_ID_ETH: len += snprintf(buf + len, BUF_SIZE - len, "dmac = %pM ", &cur->eth.dst_mac); if (cur->eth.ether_type) len += snprintf(buf + len, BUF_SIZE - len, "ethertype = 0x%x ", be16_to_cpu(cur->eth.ether_type)); if (cur->eth.vlan_id) len += snprintf(buf + len, BUF_SIZE - len, "vlan-id = %d ", be16_to_cpu(cur->eth.vlan_id)); break; case MLX4_NET_TRANS_RULE_ID_IPV4: if (cur->ipv4.src_ip) len += snprintf(buf + len, BUF_SIZE - len, "src-ip = %pI4 ", &cur->ipv4.src_ip); if (cur->ipv4.dst_ip) len += snprintf(buf + len, BUF_SIZE - len, "dst-ip = %pI4 ", &cur->ipv4.dst_ip); break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: if (cur->tcp_udp.src_port) len += snprintf(buf + len, BUF_SIZE - len, "src-port = %d ", be16_to_cpu(cur->tcp_udp.src_port)); if (cur->tcp_udp.dst_port) len += snprintf(buf + len, BUF_SIZE - len, "dst-port = %d ", be16_to_cpu(cur->tcp_udp.dst_port)); break; case MLX4_NET_TRANS_RULE_ID_IB: len += snprintf(buf + len, BUF_SIZE - len, - "dst-gid = %pI6\n", cur->ib.dst_gid); + "dst-gid = "GID_PRINT_FMT"\n", + GID_PRINT_ARGS(cur->ib.dst_gid)); len += snprintf(buf + len, BUF_SIZE - len, - "dst-gid-mask = %pI6\n", - cur->ib.dst_gid_msk); + "dst-gid-mask = "GID_PRINT_FMT"\n", + GID_PRINT_ARGS(cur->ib.dst_gid_msk)); break; case MLX4_NET_TRANS_RULE_ID_IPV6: break; default: break; } } len += snprintf(buf + len, BUF_SIZE - len, "\n"); mlx4_err(dev, "%s", buf); if (len >= BUF_SIZE) mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n"); } int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id) { struct mlx4_cmd_mailbox *mailbox; struct mlx4_spec_list *cur; u32 size = 0; int ret; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl)); trans_rule_ctrl_to_hw(rule, mailbox->buf); size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); list_for_each_entry(cur, &rule->list, list) { ret = parse_trans_rule(dev, cur, mailbox->buf + size); if (ret < 0) { mlx4_free_cmd_mailbox(dev, mailbox); return -EINVAL; } size += ret; } ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); if (ret == -ENOMEM) mlx4_err_rule(dev, "mcg table is full. Fail to register network rule.\n", rule); else if (ret) mlx4_err_rule(dev, "Fail to register network rule.\n", rule); mlx4_free_cmd_mailbox(dev, mailbox); return ret; } EXPORT_SYMBOL_GPL(mlx4_flow_attach); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id) { int err; err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id); if (err) mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n", (unsigned long long)reg_id); return err; } EXPORT_SYMBOL_GPL(mlx4_flow_detach); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn) { int err; u64 in_param; in_param = ((u64) min_range_qpn) << 32; in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF; err = mlx4_cmd(dev, in_param, 0, 0, MLX4_FLOW_STEERING_IB_UC_QP_RANGE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); return err; } EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE); int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; int index, prev; int link = 0; int i; int err; u8 port = gid[5]; u8 new_entry = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mgm = mailbox->buf; mutex_lock(&priv->mcg_table.mutex); err = find_entry(dev, port, gid, prot, mailbox, &prev, &index); if (err) goto out; if (index != -1) { if (!(be32_to_cpu(mgm->members_count) & 0xffffff)) { new_entry = 1; memcpy(mgm->gid, gid, 16); } } else { link = 1; index = mlx4_bitmap_alloc(&priv->mcg_table.bitmap); if (index == -1) { mlx4_err(dev, "No AMGM entries left\n"); err = -ENOMEM; goto out; } index += dev->caps.num_mgms; new_entry = 1; memset(mgm, 0, sizeof *mgm); memcpy(mgm->gid, gid, 16); } members_count = be32_to_cpu(mgm->members_count) & 0xffffff; if (members_count == dev->caps.num_qp_per_mgm) { mlx4_err(dev, "MGM at index %x is full.\n", index); err = -ENOMEM; goto out; } for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { mlx4_dbg(dev, "QP %06x already a member of MGM\n", qp->qpn); err = 0; goto out; } mgm->qp[members_count++] = cpu_to_be32((qp->qpn & MGM_QPN_MASK) | (!!mlx4_blck_lb << MGM_BLCK_LB_BIT)); mgm->members_count = cpu_to_be32(members_count | (u32) prot << 30); err = mlx4_WRITE_ENTRY(dev, index, mailbox); if (err) goto out; /* if !link, still add the new entry. */ if (!link) goto skip_link; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) goto out; mgm->next_gid_index = cpu_to_be32(index << 6); err = mlx4_WRITE_ENTRY(dev, prev, mailbox); if (err) goto out; skip_link: if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) new_steering_entry(dev, port, steer, index, qp->qpn); else existing_steering_entry(dev, port, steer, index, qp->qpn); } out: if (err && link && index != -1) { if (index < dev->caps.num_mgms) mlx4_warn(dev, "Got AMGM index %d < %d", index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, index - dev->caps.num_mgms, MLX4_USE_RR); } mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, enum mlx4_steer_type steer) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; int prev, index; int i, loc = -1; int err; u8 port = gid[5]; bool removed_entry = false; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); mgm = mailbox->buf; mutex_lock(&priv->mcg_table.mutex); err = find_entry(dev, port, gid, prot, mailbox, &prev, &index); if (err) goto out; if (index == -1) { - mlx4_err(dev, "MGID %pI6 not found\n", gid); + mlx4_err(dev, "MGID "GID_PRINT_FMT" not found\n", + GID_PRINT_ARGS(gid)); err = -EINVAL; goto out; } /* if this QP is also a promisc QP, it shouldn't be removed only if at least one none promisc QP is also attached to this MCG */ if (prot == MLX4_PROT_ETH && check_duplicate_entry(dev, port, steer, index, qp->qpn) && !promisc_steering_entry(dev, port, steer, index, qp->qpn, NULL)) goto out; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; for (i = 0; i < members_count; ++i) if ((be32_to_cpu(mgm->qp[i]) & MGM_QPN_MASK) == qp->qpn) { loc = i; break; } if (loc == -1) { mlx4_err(dev, "QP %06x not found in MGM\n", qp->qpn); err = -EINVAL; goto out; } /* copy the last QP in this MGM over removed QP */ mgm->qp[loc] = mgm->qp[members_count - 1]; mgm->qp[members_count - 1] = 0; mgm->members_count = cpu_to_be32(--members_count | (u32) prot << 30); if (prot == MLX4_PROT_ETH) removed_entry = can_remove_steering_entry(dev, port, steer, index, qp->qpn); if (members_count && (prot != MLX4_PROT_ETH || !removed_entry)) { err = mlx4_WRITE_ENTRY(dev, index, mailbox); goto out; } /* We are going to delete the entry, members count should be 0 */ mgm->members_count = cpu_to_be32((u32) prot << 30); if (prev == -1) { /* Remove entry from MGM */ int amgm_index = be32_to_cpu(mgm->next_gid_index) >> 6; if (amgm_index) { err = mlx4_READ_ENTRY(dev, amgm_index, mailbox); if (err) goto out; } else memset(mgm->gid, 0, 16); err = mlx4_WRITE_ENTRY(dev, index, mailbox); if (err) goto out; if (amgm_index) { if (amgm_index < dev->caps.num_mgms) mlx4_warn(dev, "MGM entry %d had AMGM index %d < %d", index, amgm_index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, amgm_index - dev->caps.num_mgms, MLX4_USE_RR); } } else { /* Remove entry from AMGM */ int cur_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; err = mlx4_READ_ENTRY(dev, prev, mailbox); if (err) goto out; mgm->next_gid_index = cpu_to_be32(cur_next_index << 6); err = mlx4_WRITE_ENTRY(dev, prev, mailbox); if (err) goto out; if (index < dev->caps.num_mgms) mlx4_warn(dev, "entry %d had next AMGM index %d < %d", prev, index, dev->caps.num_mgms); else mlx4_bitmap_free(&priv->mcg_table.bitmap, index - dev->caps.num_mgms, MLX4_USE_RR); } out: mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); return err; } static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 attach, u8 block_loopback, enum mlx4_protocol prot) { struct mlx4_cmd_mailbox *mailbox; int err = 0; int qpn; if (!mlx4_is_mfunc(dev)) return -EBADF; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); memcpy(mailbox->buf, gid, 16); qpn = qp->qpn; qpn |= (prot << 28); if (attach && block_loopback) qpn |= (1 << 31); err = mlx4_cmd(dev, mailbox->dma, qpn, attach, MLX4_CMD_QP_ATTACH, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); return err; } int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol prot, u64 *reg_id) { struct mlx4_spec_list spec = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.allow_loopback = !block_mcast_loopback; rule.port = port; rule.qpn = qp->qpn; INIT_LIST_HEAD(&rule.list); switch (prot) { case MLX4_PROT_ETH: spec.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN); memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN); break; case MLX4_PROT_IB_IPV6: spec.id = MLX4_NET_TRANS_RULE_ID_IB; memcpy(spec.ib.dst_gid, gid, 16); memset(&spec.ib.dst_gid_msk, 0xff, 16); break; default: return -EINVAL; } list_add_tail(&spec.list, &rule.list); return mlx4_flow_attach(dev, &rule, reg_id); } int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol prot, u64 *reg_id) { enum mlx4_steer_type steer; steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) return 0; case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) gid[7] |= (steer << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 1, block_mcast_loopback, prot); return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, prot, MLX4_MC_STEER); case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_trans_to_dmfs_attach(dev, qp, gid, port, block_mcast_loopback, prot, reg_id); default: return -EINVAL; } } EXPORT_SYMBOL_GPL(mlx4_multicast_attach); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot, u64 reg_id) { enum mlx4_steer_type steer; steer = (is_valid_ether_addr(&gid[10])) ? MLX4_UC_STEER : MLX4_MC_STEER; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_A0: if (prot == MLX4_PROT_ETH) return 0; case MLX4_STEERING_MODE_B0: if (prot == MLX4_PROT_ETH) gid[7] |= (steer << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER); case MLX4_STEERING_MODE_DEVICE_MANAGED: return mlx4_flow_detach(dev, reg_id); default: return -EINVAL; } } EXPORT_SYMBOL_GPL(mlx4_multicast_detach); int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode) { struct mlx4_net_trans_rule rule; u64 *regid_p; switch (mode) { case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: return -1; } if (*regid_p != 0) return -1; rule.promisc_mode = mode; rule.port = port; rule.qpn = qpn; INIT_LIST_HEAD(&rule.list); mlx4_err(dev, "going promisc on %x\n", port); return mlx4_flow_attach(dev, &rule, regid_p); } EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, enum mlx4_net_trans_promisc_mode mode) { int ret; u64 *regid_p; switch (mode) { case MLX4_FS_ALL_DEFAULT: regid_p = &dev->regid_promisc_array[port]; break; case MLX4_FS_MC_DEFAULT: regid_p = &dev->regid_allmulti_array[port]; break; default: return -1; } if (*regid_p == 0) return -1; ret = mlx4_flow_detach(dev, *regid_p); if (ret == 0) *regid_p = 0; return ret; } EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove); int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot) { if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 1, block_mcast_loopback, prot); return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback, prot, MLX4_UC_STEER); } EXPORT_SYMBOL_GPL(mlx4_unicast_attach); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot) { if (prot == MLX4_PROT_ETH) gid[7] |= (MLX4_UC_STEER << 1); if (mlx4_is_mfunc(dev)) return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot); return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_UC_STEER); } EXPORT_SYMBOL_GPL(mlx4_unicast_detach); int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { u32 qpn = (u32) vhcr->in_param & 0xffffffff; u8 port = vhcr->in_param >> 62; enum mlx4_steer_type steer = vhcr->in_modifier; /* Promiscuous unicast is not allowed in mfunc for VFs */ if ((slave != dev->caps.function) && (steer == MLX4_UC_STEER)) return 0; if (vhcr->op_modifier) return add_promisc_qp(dev, port, steer, qpn); else return remove_promisc_qp(dev, port, steer, qpn); } static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn, enum mlx4_steer_type steer, u8 add, u8 port) { return mlx4_cmd(dev, (u64) qpn | (u64) port << 62, (u32) steer, add, MLX4_CMD_PROMISC, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port); return add_promisc_qp(dev, port, MLX4_MC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port); return remove_promisc_qp(dev, port, MLX4_MC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port); return add_promisc_qp(dev, port, MLX4_UC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port) { if (mlx4_is_mfunc(dev)) return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port); return remove_promisc_qp(dev, port, MLX4_UC_STEER, qpn); } EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_remove); int mlx4_init_mcg_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); int err; /* No need for mcg_table when fw managed the mcg table*/ if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) return 0; err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms, dev->caps.num_amgms - 1, 0, 0); if (err) return err; mutex_init(&priv->mcg_table.mutex); return 0; } void mlx4_cleanup_mcg_table(struct mlx4_dev *dev) { if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap); } Index: stable/9/sys/ofed/drivers/net/mlx4/mlx4_en.h =================================================================== --- stable/9/sys/ofed/drivers/net/mlx4/mlx4_en.h (revision 279733) +++ stable/9/sys/ofed/drivers/net/mlx4/mlx4_en.h (revision 279734) @@ -1,950 +1,945 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #ifndef _MLX4_EN_H_ #define _MLX4_EN_H_ #include #include #include #include #include #include #include #include #ifdef CONFIG_MLX4_EN_DCB #include #endif #include #include #include #include #include #include #include #include "en_port.h" #include "mlx4_stats.h" #define DRV_NAME "mlx4_en" #define DRV_VERSION "2.1" #define DRV_RELDATE __DATE__ #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) /* * Device constants */ #define MLX4_EN_PAGE_SHIFT 12 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) #define DEF_RX_RINGS 16 #define MAX_RX_RINGS 128 #define MIN_RX_RINGS 4 #define TXBB_SIZE 64 #define HEADROOM (2048 / TXBB_SIZE + 1) #define STAMP_STRIDE 64 #define STAMP_DWORDS (STAMP_STRIDE / 4) #define STAMP_SHIFT 31 #define STAMP_VAL 0x7fffffff #define STATS_DELAY (HZ / 4) #define SERVICE_TASK_DELAY (HZ / 4) #define MAX_NUM_OF_FS_RULES 256 #define MLX4_EN_FILTER_HASH_SHIFT 4 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60 #ifdef CONFIG_NET_RX_BUSY_POLL #define LL_EXTENDED_STATS #endif /* vlan valid range */ #define VLAN_MIN_VALUE 1 #define VLAN_MAX_VALUE 4094 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */ #define MAX_DESC_SIZE 512 #define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) /* * OS related constants and tunables */ #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(PAGE_SIZE) #define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE) enum mlx4_en_alloc_type { MLX4_EN_ALLOC_NEW = 0, MLX4_EN_ALLOC_REPLACEMENT = 1, }; /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ #if MJUMPAGESIZE == 4096 enum { FRAG_SZ0 = MCLBYTES, FRAG_SZ1 = MJUMPAGESIZE, FRAG_SZ2 = MJUMPAGESIZE, }; #define MLX4_EN_MAX_RX_FRAGS 3 #elif MJUMPAGESIZE == 8192 enum { FRAG_SZ0 = MCLBYTES, FRAG_SZ1 = MJUMPAGESIZE, }; #define MLX4_EN_MAX_RX_FRAGS 2 #elif MJUMPAGESIZE == 8192 #else #error "Unknown PAGE_SIZE" #endif /* Maximum ring sizes */ #define MLX4_EN_DEF_TX_QUEUE_SIZE 4096 /* Minimum packet number till arming the CQ */ #define MLX4_EN_MIN_RX_ARM 2048 #define MLX4_EN_MIN_TX_ARM 2048 /* Maximum ring sizes */ #define MLX4_EN_MAX_TX_SIZE 8192 #define MLX4_EN_MAX_RX_SIZE 8192 /* Minimum ring sizes */ #define MLX4_EN_MIN_RX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE) #define MLX4_EN_SMALL_PKT_SIZE 64 #define MLX4_EN_MAX_TX_RING_P_UP 32 #define MLX4_EN_NUM_UP 1 #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ - (MLX4_EN_NUM_UP + 1)) + MLX4_EN_NUM_UP) #define MLX4_EN_DEF_TX_RING_SIZE 1024 #define MLX4_EN_DEF_RX_RING_SIZE 1024 /* Target number of bytes to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 0x20000 #define MLX4_EN_RX_COAL_TIME 0x10 #define MLX4_EN_TX_COAL_PKTS 64 #define MLX4_EN_TX_COAL_TIME 64 #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 #define MLX4_EN_RX_RATE_HIGH 450000 #define MLX4_EN_RX_COAL_TIME_HIGH 128 #define MLX4_EN_RX_SIZE_THRESH 1024 #define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH) #define MLX4_EN_SAMPLE_INTERVAL 0 #define MLX4_EN_AVG_PKT_SMALL 256 #define MLX4_EN_AUTO_CONF 0xffff #define MLX4_EN_DEF_RX_PAUSE 1 #define MLX4_EN_DEF_TX_PAUSE 1 /* Interval between successive polls in the Tx routine when polling is used instead of interrupts (in per-core Tx rings) - should be power of 2 */ #define MLX4_EN_TX_POLL_MODER 16 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4) #define MLX4_EN_64_ALIGN (64 - NET_SKB_PAD) #define SMALL_PACKET_SIZE (256 - NET_IP_ALIGN) #define HEADER_COPY_SIZE (128) #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETHER_HDR_LEN) #define MLX4_EN_MIN_MTU 46 #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 #define MLX4_EN_LOOPBACK_TIMEOUT 100 #ifdef MLX4_EN_PERF_STAT /* Number of samples to 'average' */ #define AVG_SIZE 128 #define AVG_FACTOR 1024 #define INC_PERF_COUNTER(cnt) (++(cnt)) #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add)) #define AVG_PERF_COUNTER(cnt, sample) \ ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE) #define GET_PERF_COUNTER(cnt) (cnt) #define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR) #else #define INC_PERF_COUNTER(cnt) do {} while (0) #define ADD_PERF_COUNTER(cnt, add) do {} while (0) #define AVG_PERF_COUNTER(cnt, sample) do {} while (0) #define GET_PERF_COUNTER(cnt) (0) #define GET_AVG_PERF_COUNTER(cnt) (0) #endif /* MLX4_EN_PERF_STAT */ /* * Configurables */ enum cq_type { RX = 0, TX = 1, }; /* * Useful macros */ #define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x)) #define XNOR(x, y) (!(x) == !(y)) #define ILLEGAL_MAC(addr) (addr == 0xffffffffffffULL || addr == 0x0) struct mlx4_en_tx_info { struct mbuf *mb; u32 nr_txbb; u32 nr_bytes; u8 linear; u8 nr_segs; u8 data_offset; u8 inl; #if 0 u8 ts_requested; #endif }; #define MLX4_EN_BIT_DESC_OWN 0x80000000 #define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) #define MLX4_EN_MEMTYPE_PAD 0x100 #define DS_SIZE sizeof(struct mlx4_wqe_data_seg) struct mlx4_en_tx_desc { struct mlx4_wqe_ctrl_seg ctrl; union { struct mlx4_wqe_data_seg data; /* at least one data segment */ struct mlx4_wqe_lso_seg lso; struct mlx4_wqe_inline_seg inl; }; }; #define MLX4_EN_USE_SRQ 0x01000000 #define MLX4_EN_TX_BUDGET 64*4 //Compensate for no NAPI in freeBSD - might need some fine tunning in the future. #define MLX4_EN_RX_BUDGET 64 #define MLX4_EN_CX3_LOW_ID 0x1000 #define MLX4_EN_CX3_HIGH_ID 0x1005 struct mlx4_en_tx_ring { spinlock_t tx_lock; struct mlx4_hwq_resources wqres; u32 size ; /* number of TXBBs */ u32 size_mask; u16 stride; u16 cqn; /* index of port CQ associated with this ring */ u32 prod; u32 cons; u32 buf_size; u32 doorbell_qpn; void *buf; u16 poll_cnt; int blocked; struct mlx4_en_tx_info *tx_info; u8 *bounce_buf; u8 queue_index; cpuset_t affinity_mask; struct buf_ring *br; u32 last_nr_txbb; struct mlx4_qp qp; struct mlx4_qp_context context; int qpn; enum mlx4_qp_state qp_state; struct mlx4_srq dummy; unsigned long bytes; unsigned long packets; unsigned long tx_csum; unsigned long queue_stopped; unsigned long wake_queue; struct mlx4_bf bf; bool bf_enabled; struct netdev_queue *tx_queue; int hwtstamp_tx_type; spinlock_t comp_lock; int full_size; int inline_thold; u64 watchdog_time; }; struct mlx4_en_rx_desc { /* actual number of entries depends on rx ring stride */ struct mlx4_wqe_data_seg data[0]; }; struct mlx4_en_rx_buf { dma_addr_t dma; struct page *page; unsigned int page_offset; }; struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; u16 stride; u16 log_stride; u16 cqn; /* index of port CQ associated with this ring */ u32 prod; u32 cons; u32 buf_size; u8 fcs_del; u16 rx_alloc_order; u32 rx_alloc_size; u32 rx_buf_size; u32 rx_mb_size; int qpn; void *buf; void *rx_info; unsigned long errors; unsigned long bytes; unsigned long packets; #ifdef LL_EXTENDED_STATS unsigned long yields; unsigned long misses; unsigned long cleaned; #endif unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; int numa_node; struct lro_ctrl lro; }; static inline int mlx4_en_can_lro(__be16 status) { - static __be16 status_all; - static __be16 status_ipv4_ipok_tcp; - static __be16 status_ipv6_ipok_tcp; - - status_all = cpu_to_be16( + const __be16 status_all = cpu_to_be16( MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F | MLX4_CQE_STATUS_IPV6 | MLX4_CQE_STATUS_IPV4OPT | MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP | MLX4_CQE_STATUS_IPOK); - status_ipv4_ipok_tcp = cpu_to_be16( + const __be16 status_ipv4_ipok_tcp = cpu_to_be16( MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPOK | MLX4_CQE_STATUS_TCP); - status_ipv6_ipok_tcp = cpu_to_be16( + const __be16 status_ipv6_ipok_tcp = cpu_to_be16( MLX4_CQE_STATUS_IPV6 | MLX4_CQE_STATUS_IPOK | MLX4_CQE_STATUS_TCP); status &= status_all; return (status == status_ipv4_ipok_tcp || status == status_ipv6_ipok_tcp); } - struct mlx4_en_cq { struct mlx4_cq mcq; struct mlx4_hwq_resources wqres; int ring; spinlock_t lock; struct net_device *dev; /* Per-core Tx cq processing support */ struct timer_list timer; int size; int buf_size; unsigned vector; enum cq_type is_tx; u16 moder_time; u16 moder_cnt; struct mlx4_cqe *buf; struct task cq_task; struct taskqueue *tq; #define MLX4_EN_OPCODE_ERROR 0x1e u32 tot_rx; u32 tot_tx; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define MLX4_EN_CQ_STATEIDLE 0 #define MLX4_EN_CQ_STATENAPI 1 /* NAPI owns this CQ */ #define MLX4_EN_CQ_STATEPOLL 2 /* poll owns this CQ */ #define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATENAPI | MLX4_EN_CQ_STATEPOLL) #define MLX4_EN_CQ_STATENAPI_YIELD 4 /* NAPI yielded this CQ */ #define MLX4_EN_CQ_STATEPOLL_YIELD 8 /* poll yielded this CQ */ #define CQ_YIELD (MLX4_EN_CQ_STATENAPI_YIELD | MLX4_EN_CQ_STATEPOLL_YIELD) #define CQ_USER_PEND (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATEPOLL_YIELD) spinlock_t poll_lock; /* protects from LLS/napi conflicts */ #endif /* CONFIG_NET_RX_BUSY_POLL */ }; struct mlx4_en_port_profile { u32 flags; u32 tx_ring_num; u32 rx_ring_num; u32 tx_ring_size; u32 rx_ring_size; u8 rx_pause; u8 rx_ppp; u8 tx_pause; u8 tx_ppp; int rss_rings; }; struct mlx4_en_profile { int rss_xor; int udp_rss; u8 rss_mask; u32 active_ports; u32 small_pkt_int; u8 no_reset; u8 num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; struct mlx4_en_dev { struct mlx4_dev *dev; struct pci_dev *pdev; struct mutex state_lock; struct net_device *pndev[MLX4_MAX_PORTS + 1]; u32 port_cnt; bool device_up; struct mlx4_en_profile profile; u32 LSO_support; struct workqueue_struct *workqueue; struct device *dma_device; void __iomem *uar_map; struct mlx4_uar priv_uar; struct mlx4_mr mr; u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; unsigned long last_overflow_check; unsigned long overflow_period; }; struct mlx4_en_rss_map { int base_qpn; struct mlx4_qp qps[MAX_RX_RINGS]; enum mlx4_qp_state state[MAX_RX_RINGS]; struct mlx4_qp indir_qp; enum mlx4_qp_state indir_state; }; struct mlx4_en_port_state { int link_state; int link_speed; int transciver; int autoneg; }; enum mlx4_en_mclist_act { MCLIST_NONE, MCLIST_REM, MCLIST_ADD, }; struct mlx4_en_mc_list { struct list_head list; enum mlx4_en_mclist_act action; u8 addr[ETH_ALEN]; u64 reg_id; }; #ifdef CONFIG_MLX4_EN_DCB /* Minimal TC BW - setting to 0 will block traffic */ #define MLX4_EN_BW_MIN 1 #define MLX4_EN_BW_MAX 100 /* Utilize 100% of the line */ #define MLX4_EN_TC_ETS 7 #endif enum { MLX4_EN_FLAG_PROMISC = (1 << 0), MLX4_EN_FLAG_MC_PROMISC = (1 << 1), /* whether we need to enable hardware loopback by putting dmac * in Tx WQE */ MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), /* whether we need to drop packets that hardware loopback-ed */ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), #ifdef CONFIG_MLX4_EN_DCB MLX4_EN_FLAG_DCB_ENABLED = (1 << 5) #endif }; #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) #define MLX4_EN_MAC_HASH_IDX 5 struct en_port { struct kobject kobj; struct mlx4_dev *dev; u8 port_num; u8 vport_num; }; struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; }; struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; struct net_device *dev; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx4_en_port_state port_state; spinlock_t stats_lock; /* To allow rules removal while port is going down */ struct list_head ethtool_list; unsigned long last_moder_packets[MAX_RX_RINGS]; unsigned long last_moder_tx_packets; unsigned long last_moder_bytes[MAX_RX_RINGS]; unsigned long last_moder_jiffies; int last_moder_time[MAX_RX_RINGS]; u16 rx_usecs; u16 rx_frames; u16 tx_usecs; u16 tx_frames; u32 pkt_rate_low; u32 rx_usecs_low; u32 pkt_rate_high; u32 rx_usecs_high; u32 sample_interval; u32 adaptive_rx_coal; u32 msg_enable; u32 loopback_ok; u32 validate_loopback; struct mlx4_hwq_resources res; int link_state; int last_link_state; bool port_up; int port; int registered; int allocated; int stride; unsigned char current_mac[ETH_ALEN + 2]; u64 mac; int mac_index; unsigned max_mtu; int base_qpn; int cqe_factor; struct mlx4_en_rss_map rss_map; __be32 ctrl_flags; u32 flags; u8 num_tx_rings_p_up; u32 tx_ring_num; u32 rx_ring_num; u32 rx_mb_size; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 rx_alloc_order; u32 rx_alloc_size; u32 rx_buf_size; u16 num_frags; u16 log_rx_info; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; struct mlx4_en_cq **tx_cq; struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; struct work_struct watchdog_task; struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_flow_stats flowstats[MLX4_NUM_PRIORITIES]; struct mlx4_en_port_stats port_stats; struct mlx4_en_vport_stats vport_stats; struct mlx4_en_vf_stats vf_stats; DECLARE_BITMAP(stats_bitmap, NUM_ALL_STATS); struct list_head mc_list; struct list_head curr_list; u64 broadcast_id; struct mlx4_en_stat_out_mbox hw_stats; int vids[128]; bool wol; struct device *ddev; struct dentry *dev_root; u32 counter_index; eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; struct callout watchdog_timer; struct ifmedia media; volatile int blocked; struct sysctl_oid *sysctl; struct sysctl_ctx_list conf_ctx; struct sysctl_ctx_list stat_ctx; #define MLX4_EN_MAC_HASH_IDX 5 struct hlist_head mac_hash[MLX4_EN_MAC_HASH_SIZE]; #ifdef CONFIG_MLX4_EN_DCB struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; u8 dcbx_cap; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; int last_filter_id; struct list_head filters; struct hlist_head filter_hash[1 << MLX4_EN_FILTER_HASH_SHIFT]; #endif struct en_port *vf_ports[MLX4_MAX_NUM_VF]; unsigned long last_ifq_jiffies; u64 if_counters_rx_errors; u64 if_counters_rx_no_buffer; }; enum mlx4_en_wol { MLX4_EN_WOL_MAGIC = (1ULL << 61), MLX4_EN_WOL_ENABLED = (1ULL << 62), }; struct mlx4_mac_entry { struct hlist_node hlist; unsigned char mac[ETH_ALEN + 2]; u64 reg_id; }; #ifdef CONFIG_NET_RX_BUSY_POLL static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { spin_lock_init(&cq->poll_lock); cq->state = MLX4_EN_CQ_STATEIDLE; } /* called from the device poll rutine to get ownership of a cq */ static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) { int rc = true; spin_lock(&cq->poll_lock); if (cq->state & MLX4_CQ_LOCKED) { WARN_ON(cq->state & MLX4_EN_CQ_STATENAPI); cq->state |= MLX4_EN_CQ_STATENAPI_YIELD; rc = false; } else /* we don't care if someone yielded */ cq->state = MLX4_EN_CQ_STATENAPI; spin_unlock(&cq->poll_lock); return rc; } /* returns true is someone tried to get the cq while napi had it */ static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) { int rc = false; spin_lock(&cq->poll_lock); WARN_ON(cq->state & (MLX4_EN_CQ_STATEPOLL | MLX4_EN_CQ_STATENAPI_YIELD)); if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) rc = true; cq->state = MLX4_EN_CQ_STATEIDLE; spin_unlock(&cq->poll_lock); return rc; } /* called from mlx4_en_low_latency_poll() */ static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) { int rc = true; spin_lock_bh(&cq->poll_lock); if ((cq->state & MLX4_CQ_LOCKED)) { struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; cq->state |= MLX4_EN_CQ_STATEPOLL_YIELD; rc = false; #ifdef LL_EXTENDED_STATS rx_ring->yields++; #endif } else /* preserve yield marks */ cq->state |= MLX4_EN_CQ_STATEPOLL; spin_unlock_bh(&cq->poll_lock); return rc; } /* returns true if someone tried to get the cq while it was locked */ static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) { int rc = false; spin_lock_bh(&cq->poll_lock); WARN_ON(cq->state & (MLX4_EN_CQ_STATENAPI)); if (cq->state & MLX4_EN_CQ_STATEPOLL_YIELD) rc = true; cq->state = MLX4_EN_CQ_STATEIDLE; spin_unlock_bh(&cq->poll_lock); return rc; } /* true if a socket is polling, even if it did not get the lock */ static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); return cq->state & CQ_USER_PEND; } #else static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) { } static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) { return true; } static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) { return false; } static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) { return false; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_destroy_netdev(struct net_device *dev); int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof); int mlx4_en_start_port(struct net_device *dev); void mlx4_en_stop_port(struct net_device *dev); void mlx4_en_free_resources(struct mlx4_en_priv *priv); int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); int mlx4_en_pre_config(struct mlx4_en_priv *priv); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, int entries, int ring, enum cq_type mode, int node); void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq); int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb); int mlx4_en_transmit(struct ifnet *dev, struct mbuf *m); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_idx); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio); void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring); void mlx4_en_qflush(struct ifnet *dev); int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node); void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride); void mlx4_en_tx_que(void *context, int pending); void mlx4_en_rx_que(void *context, int pending); int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv); void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring); int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget); void mlx4_en_poll_tx_cq(unsigned long data); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); int mlx4_en_map_buffer(struct mlx4_buf *buf); void mlx4_en_unmap_buffer(struct mlx4_buf *buf); void mlx4_en_calc_rx_buf(struct net_device *dev); int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv); void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv); int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring); void mlx4_en_rx_irq(struct mlx4_cq *mcq); int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode); int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv); int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset); int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port); int mlx4_en_get_vport_stats(struct mlx4_en_dev *mdev, u8 port); void mlx4_en_create_debug_files(struct mlx4_en_priv *priv); void mlx4_en_delete_debug_files(struct mlx4_en_priv *priv); int mlx4_en_register_debugfs(void); void mlx4_en_unregister_debugfs(void); #ifdef CONFIG_MLX4_EN_DCB extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops; extern const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops; #endif int mlx4_en_setup_tc(struct net_device *dev, u8 up); #ifdef CONFIG_RFS_ACCEL void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *rx_ring); #endif #define MLX4_EN_NUM_SELF_TEST 5 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); /* * Functions for time stamping */ #define SKBTX_HW_TSTAMP (1 << 0) #define SKBTX_IN_PROGRESS (1 << 2) u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe); /* Functions for caching and restoring statistics */ int mlx4_en_get_sset_count(struct net_device *dev, int sset); void mlx4_en_restore_ethtool_stats(struct mlx4_en_priv *priv, u64 *data); /* * Globals */ extern const struct ethtool_ops mlx4_en_ethtool_ops; /* * Defines for link speed - needed by selftest */ #define MLX4_EN_LINK_SPEED_1G 1000 #define MLX4_EN_LINK_SPEED_10G 10000 #define MLX4_EN_LINK_SPEED_40G 40000 enum { NETIF_MSG_DRV = 0x0001, NETIF_MSG_PROBE = 0x0002, NETIF_MSG_LINK = 0x0004, NETIF_MSG_TIMER = 0x0008, NETIF_MSG_IFDOWN = 0x0010, NETIF_MSG_IFUP = 0x0020, NETIF_MSG_RX_ERR = 0x0040, NETIF_MSG_TX_ERR = 0x0080, NETIF_MSG_TX_QUEUED = 0x0100, NETIF_MSG_INTR = 0x0200, NETIF_MSG_TX_DONE = 0x0400, NETIF_MSG_RX_STATUS = 0x0800, NETIF_MSG_PKTDATA = 0x1000, NETIF_MSG_HW = 0x2000, NETIF_MSG_WOL = 0x4000, }; /* * printk / logging functions */ #define en_print(level, priv, format, arg...) \ { \ if ((priv)->registered) \ printk(level "%s: %s: " format, DRV_NAME, \ (priv->dev)->if_xname, ## arg); \ else \ printk(level "%s: %s: Port %d: " format, \ DRV_NAME, dev_name(&priv->mdev->pdev->dev), \ (priv)->port, ## arg); \ } #define en_dbg(mlevel, priv, format, arg...) \ do { \ if (NETIF_MSG_##mlevel & priv->msg_enable) \ en_print(KERN_DEBUG, priv, format, ##arg); \ } while (0) #define en_warn(priv, format, arg...) \ en_print(KERN_WARNING, priv, format, ##arg) #define en_err(priv, format, arg...) \ en_print(KERN_ERR, priv, format, ##arg) #define en_info(priv, format, arg...) \ en_print(KERN_INFO, priv, format, ## arg) #define mlx4_err(mdev, format, arg...) \ pr_err("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #define mlx4_info(mdev, format, arg...) \ pr_info("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #define mlx4_warn(mdev, format, arg...) \ pr_warning("%s %s: " format, DRV_NAME, \ dev_name(&mdev->pdev->dev), ##arg) #endif Index: stable/9/sys/ofed/include/linux/mlx4/device.h =================================================================== --- stable/9/sys/ofed/include/linux/mlx4/device.h (revision 279733) +++ stable/9/sys/ofed/include/linux/mlx4/device.h (revision 279734) @@ -1,1286 +1,1297 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_DEVICE_H #define MLX4_DEVICE_H #include #include #include #include #include #include #include #include #define MAX_MSIX_P_PORT 17 #define MAX_MSIX 64 #define MSIX_LEGACY_SZ 4 #define MIN_MSIX_P_PORT 5 #define MLX4_ROCE_MAX_GIDS 128 #define MLX4_ROCE_PF_GIDS 16 #define MLX4_NUM_UP 8 #define MLX4_NUM_TC 8 #define MLX4_MAX_100M_UNITS_VAL 255 /* * work around: can't set values * greater then this value when * using 100 Mbps units. */ #define MLX4_RATELIMIT_100M_UNITS 3 /* 100 Mbps */ #define MLX4_RATELIMIT_1G_UNITS 4 /* 1 Gbps */ #define MLX4_RATELIMIT_DEFAULT 0x00ff #define CORE_CLOCK_MASK 0xffffffffffffULL enum { MLX4_FLAG_MSI_X = 1 << 0, MLX4_FLAG_OLD_PORT_CMDS = 1 << 1, MLX4_FLAG_MASTER = 1 << 2, MLX4_FLAG_SLAVE = 1 << 3, MLX4_FLAG_SRIOV = 1 << 4, MLX4_FLAG_DEV_NUM_STR = 1 << 5, MLX4_FLAG_OLD_REG_MAC = 1 << 6, }; enum { MLX4_PORT_CAP_IS_SM = 1 << 1, MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19, }; enum { MLX4_MAX_PORTS = 2, MLX4_MAX_PORT_PKEYS = 128 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. * These qkeys must not be allowed for general use. This is a 64k range, * and to test for violation, we use the mask (protect against future chg). */ #define MLX4_RESERVED_QKEY_BASE (0xFFFF0000) #define MLX4_RESERVED_QKEY_MASK (0xFFFF0000) enum { MLX4_BOARD_ID_LEN = 64, MLX4_VSD_LEN = 208 }; enum { MLX4_MAX_NUM_PF = 16, MLX4_MAX_NUM_VF = 64, MLX4_MFUNC_MAX = 80, MLX4_MAX_EQ_NUM = 1024, MLX4_MFUNC_EQ_NUM = 4, MLX4_MFUNC_MAX_EQES = 8, MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; /* Driver supports 3 diffrent device methods to manage traffic steering: * -device managed - High level API for ib and eth flow steering. FW is * managing flow steering tables. * - B0 steering mode - Common low level API for ib and (if supported) eth. * - A0 steering mode - Limited low level API for eth. In case of IB, * B0 mode is in use. */ enum { MLX4_STEERING_MODE_A0, MLX4_STEERING_MODE_B0, MLX4_STEERING_MODE_DEVICE_MANAGED }; static inline const char *mlx4_steering_mode_str(int steering_mode) { switch (steering_mode) { case MLX4_STEERING_MODE_A0: return "A0 steering"; case MLX4_STEERING_MODE_B0: return "B0 steering"; case MLX4_STEERING_MODE_DEVICE_MANAGED: return "Device managed flow steering"; default: return "Unrecognize steering mode"; } } enum { MLX4_DEV_CAP_FLAG_RC = 1LL << 0, MLX4_DEV_CAP_FLAG_UC = 1LL << 1, MLX4_DEV_CAP_FLAG_UD = 1LL << 2, MLX4_DEV_CAP_FLAG_XRC = 1LL << 3, MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6, MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7, MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX4_DEV_CAP_FLAG_DPDP = 1LL << 12, MLX4_DEV_CAP_FLAG_BLH = 1LL << 15, MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1LL << 16, MLX4_DEV_CAP_FLAG_APM = 1LL << 17, MLX4_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX4_DEV_CAP_FLAG_RAW_MCAST = 1LL << 19, MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1LL << 20, MLX4_DEV_CAP_FLAG_UD_MCAST = 1LL << 21, MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30, MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32, MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34, MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37, MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38, MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40, MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41, MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42, MLX4_DEV_CAP_FLAG_CROSS_CHANNEL = 1LL << 44, MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, MLX4_DEV_CAP_FLAG_COUNTERS_EXT = 1LL << 49, MLX4_DEV_CAP_FLAG_SET_PORT_ETH_SCHED = 1LL << 53, MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, MLX4_DEV_CAP_FLAG_FAST_DROP = 1LL << 57, MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61, MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62 }; enum { MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0, MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, MLX4_DEV_CAP_FLAG2_FSM = 1LL << 4, MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 5, MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 6, MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1LL << 7, MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 8, MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 9, MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP = 1LL << 10, MLX4_DEV_CAP_FLAG2_FLOWSTATS_EN = 1LL << 11, MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 12, - MLX4_DEV_CAP_FLAG2_TS = 1LL << 13, - MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1LL << 14 + MLX4_DEV_CAP_FLAG2_TS = 1LL << 13, + MLX4_DEV_CAP_FLAG2_DRIVER_VERSION_TO_FW = 1LL << 14, + MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 15, + MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 16, + MLX4_DEV_CAP_FLAG2_FS_EN_NCSI = 1LL << 17, + MLX4_DEV_CAP_FLAG2_80_VFS = 1LL << 18, + MLX4_DEV_CAP_FLAG2_DMFS_TAG_MODE = 1LL << 19, + MLX4_DEV_CAP_FLAG2_ROCEV2 = 1LL << 20, + MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL = 1LL << 21, + MLX4_DEV_CAP_FLAG2_CQE_STRIDE = 1LL << 22, + MLX4_DEV_CAP_FLAG2_EQE_STRIDE = 1LL << 23, + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1LL << 24, + MLX4_DEV_CAP_FLAG2_RX_CSUM_MODE = 1LL << 25, }; /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, * where we expect to use blueflame. These QPs must not have * bits 6 and 7 set in their qp number. * * This enum may use only bits 0..7. */ enum { MLX4_RESERVE_BF_QP = 1 << 7, }; enum { MLX4_DEV_CAP_CQ_FLAG_IO = 1 << 0 }; enum { MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0, MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1 }; enum { MLX4_USER_DEV_CAP_64B_CQE = 1L << 0 }; enum { MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0 }; #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1, MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10, MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11, }; enum mlx4_event { MLX4_EVENT_TYPE_COMP = 0x00, MLX4_EVENT_TYPE_PATH_MIG = 0x01, MLX4_EVENT_TYPE_COMM_EST = 0x02, MLX4_EVENT_TYPE_SQ_DRAINED = 0x03, MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, MLX4_EVENT_TYPE_SRQ_LIMIT = 0x14, MLX4_EVENT_TYPE_CQ_ERROR = 0x04, MLX4_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, MLX4_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, MLX4_EVENT_TYPE_PATH_MIG_FAILED = 0x07, MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, MLX4_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, MLX4_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, MLX4_EVENT_TYPE_PORT_CHANGE = 0x09, MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f, MLX4_EVENT_TYPE_ECC_DETECT = 0x0e, MLX4_EVENT_TYPE_CMD = 0x0a, MLX4_EVENT_TYPE_VEP_UPDATE = 0x19, MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a, MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d, MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT = 0x3e, MLX4_EVENT_TYPE_NONE = 0xff, }; enum { MLX4_PORT_CHANGE_SUBTYPE_DOWN = 1, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 }; enum { MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE = 1, MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE = 2, }; enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; enum slave_port_state { SLAVE_PORT_DOWN = 0, SLAVE_PENDING_UP, SLAVE_PORT_UP, }; enum slave_port_gen_event { SLAVE_PORT_GEN_EVENT_DOWN = 0, SLAVE_PORT_GEN_EVENT_UP, SLAVE_PORT_GEN_EVENT_NONE, }; enum slave_port_state_event { MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, MLX4_PORT_STATE_DEV_EVENT_PORT_UP, MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, MLX4_PORT_STATE_IB_EVENT_GID_INVALID, }; enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, MLX4_PERM_REMOTE_READ = 1 << 12, MLX4_PERM_REMOTE_WRITE = 1 << 13, MLX4_PERM_ATOMIC = 1 << 14, MLX4_PERM_BIND_MW = 1 << 15, }; enum { MLX4_OPCODE_NOP = 0x00, MLX4_OPCODE_SEND_INVAL = 0x01, MLX4_OPCODE_RDMA_WRITE = 0x08, MLX4_OPCODE_RDMA_WRITE_IMM = 0x09, MLX4_OPCODE_SEND = 0x0a, MLX4_OPCODE_SEND_IMM = 0x0b, MLX4_OPCODE_LSO = 0x0e, MLX4_OPCODE_RDMA_READ = 0x10, MLX4_OPCODE_ATOMIC_CS = 0x11, MLX4_OPCODE_ATOMIC_FA = 0x12, MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14, MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15, MLX4_OPCODE_BIND_MW = 0x18, MLX4_OPCODE_FMR = 0x19, MLX4_OPCODE_LOCAL_INVAL = 0x1b, MLX4_OPCODE_CONFIG_CMD = 0x1f, MLX4_RECV_OPCODE_RDMA_WRITE_IMM = 0x00, MLX4_RECV_OPCODE_SEND = 0x01, MLX4_RECV_OPCODE_SEND_IMM = 0x02, MLX4_RECV_OPCODE_SEND_INVAL = 0x03, MLX4_CQE_OPCODE_ERROR = 0x1e, MLX4_CQE_OPCODE_RESIZE = 0x16, }; enum { MLX4_STAT_RATE_OFFSET = 5 }; enum mlx4_protocol { MLX4_PROT_IB_IPV6 = 0, MLX4_PROT_ETH, MLX4_PROT_IB_IPV4, MLX4_PROT_FCOE }; enum { MLX4_MTT_FLAG_PRESENT = 1 }; enum { MLX4_MAX_MTT_SHIFT = 31 }; enum mlx4_qp_region { MLX4_QP_REGION_FW = 0, MLX4_QP_REGION_ETH_ADDR, MLX4_QP_REGION_FC_ADDR, MLX4_QP_REGION_FC_EXCH, MLX4_NUM_QP_REGION }; enum mlx4_port_type { MLX4_PORT_TYPE_NONE = 0, MLX4_PORT_TYPE_IB = 1, MLX4_PORT_TYPE_ETH = 2, MLX4_PORT_TYPE_AUTO = 3, MLX4_PORT_TYPE_NA = 4 }; enum mlx4_special_vlan_idx { MLX4_NO_VLAN_IDX = 0, MLX4_VLAN_MISS_IDX, MLX4_VLAN_REGULAR }; enum mlx4_steer_type { MLX4_MC_STEER = 0, MLX4_UC_STEER, MLX4_NUM_STEERS }; enum { MLX4_NUM_FEXCH = 64 * 1024, }; enum { MLX4_MAX_FAST_REG_PAGES = 511, }; enum { MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14, MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15, MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16, }; /* Port mgmt change event handling */ enum { MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0, MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1, MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2, MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3, MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor) { return (major << 32) | (minor << 16) | subminor; } struct mlx4_phys_caps { u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; u32 base_sqpn; u32 base_proxy_sqpn; u32 base_tunnel_sqpn; }; struct mlx4_caps { u64 fw_ver; u32 function; int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; int ib_mtu_cap[MLX4_MAX_PORTS + 1]; __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; int pkey_table_len[MLX4_MAX_PORTS + 1]; int trans_type[MLX4_MAX_PORTS + 1]; int vendor_oui[MLX4_MAX_PORTS + 1]; int wavelength[MLX4_MAX_PORTS + 1]; u64 trans_code[MLX4_MAX_PORTS + 1]; int local_ca_ack_delay; int num_uars; u32 uar_page_size; int bf_reg_size; int bf_regs_per_page; int max_sq_sg; int max_rq_sg; int num_qps; int max_wqes; int max_sq_desc_sz; int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; u32 *qp0_proxy; u32 *qp1_proxy; u32 *qp0_tunnel; u32 *qp1_tunnel; int num_srqs; int max_srq_wqes; int max_srq_sge; int reserved_srqs; int num_cqs; int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; int num_comp_vectors; int comp_pool; int num_mpts; int max_fmr_maps; u64 num_mtts; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; int reserved_uars; int num_mgms; int num_amgms; int reserved_mcgs; int num_qp_per_mgm; int steering_mode; int num_pds; int reserved_pds; int max_xrcds; int reserved_xrcds; int mtt_entry_sz; u32 max_msg_sz; u32 page_size_cap; u64 flags; u64 flags2; u32 bmme_flags; u32 reserved_lkey; u16 stat_rate_support; u8 cq_timestamp; u8 port_width_cap[MLX4_MAX_PORTS + 1]; int max_gso_sz; int max_rss_tbl_sz; int reserved_qps_cnt[MLX4_NUM_QP_REGION]; int reserved_qps; int reserved_qps_base[MLX4_NUM_QP_REGION]; int log_num_macs; int log_num_vlans; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; u8 supported_type[MLX4_MAX_PORTS + 1]; u8 suggested_type[MLX4_MAX_PORTS + 1]; u8 default_sense[MLX4_MAX_PORTS + 1]; u32 port_mask[MLX4_MAX_PORTS + 1]; enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; u16 sqp_demux; u32 sync_qp; u32 cq_flags; u32 eqe_size; u32 cqe_size; u8 eqe_factor; u32 userspace_caps; /* userspace must be aware to */ u32 function_caps; /* functions must be aware to */ u8 fast_drop; u16 hca_core_clock; u32 max_basic_counters; u32 max_extended_counters; u8 def_counter_index[MLX4_MAX_PORTS + 1]; }; struct mlx4_buf_list { void *buf; dma_addr_t map; }; struct mlx4_buf { struct mlx4_buf_list direct; struct mlx4_buf_list *page_list; int nbufs; int npages; int page_shift; }; struct mlx4_mtt { u32 offset; int order; int page_shift; }; enum { MLX4_DB_PER_PAGE = PAGE_SIZE / 4 }; struct mlx4_db_pgdir { struct list_head list; DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE); DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2); unsigned long *bits[2]; __be32 *db_page; dma_addr_t db_dma; }; struct mlx4_ib_user_db_page; struct mlx4_db { __be32 *db; union { struct mlx4_db_pgdir *pgdir; struct mlx4_ib_user_db_page *user_page; } u; dma_addr_t dma; int index; int order; }; struct mlx4_hwq_resources { struct mlx4_db db; struct mlx4_mtt mtt; struct mlx4_buf buf; }; struct mlx4_mr { struct mlx4_mtt mtt; u64 iova; u64 size; u32 key; u32 pd; u32 access; int enabled; }; enum mlx4_mw_type { MLX4_MW_TYPE_1 = 1, MLX4_MW_TYPE_2 = 2, }; struct mlx4_mw { u32 key; u32 pd; enum mlx4_mw_type type; int enabled; }; struct mlx4_fmr { struct mlx4_mr mr; struct mlx4_mpt_entry *mpt; __be64 *mtts; dma_addr_t dma_handle; int max_pages; int max_maps; int maps; u8 page_shift; }; struct mlx4_uar { unsigned long pfn; int index; struct list_head bf_list; unsigned free_bf_bmap; void __iomem *map; void __iomem *bf_map; }; struct mlx4_bf { unsigned long offset; int buf_size; struct mlx4_uar *uar; void __iomem *reg; }; struct mlx4_cq { void (*comp) (struct mlx4_cq *); void (*event) (struct mlx4_cq *, enum mlx4_event); struct mlx4_uar *uar; u32 cons_index; __be32 *set_ci_db; __be32 *arm_db; int arm_sn; int cqn; unsigned vector; atomic_t refcount; struct completion free; int eqn; u16 irq; }; struct mlx4_qp { void (*event) (struct mlx4_qp *, enum mlx4_event); int qpn; atomic_t refcount; struct completion free; }; struct mlx4_srq { void (*event) (struct mlx4_srq *, enum mlx4_event); int srqn; int max; int max_gs; int wqe_shift; atomic_t refcount; struct completion free; }; struct mlx4_av { __be32 port_pd; u8 reserved1; u8 g_slid; __be16 dlid; u8 reserved2; u8 gid_index; u8 stat_rate; u8 hop_limit; __be32 sl_tclass_flowlabel; u8 dgid[16]; }; struct mlx4_eth_av { __be32 port_pd; u8 reserved1; u8 smac_idx; u16 reserved2; u8 reserved3; u8 gid_index; u8 stat_rate; u8 hop_limit; __be32 sl_tclass_flowlabel; u8 dgid[16]; u8 s_mac[6]; u8 reserved4[2]; __be16 vlan; u8 mac[6]; }; union mlx4_ext_av { struct mlx4_av ib; struct mlx4_eth_av eth; }; struct mlx4_if_stat_control { u8 reserved1[3]; /* Extended counters enabled */ u8 cnt_mode; /* Number of interfaces */ __be32 num_of_if; __be32 reserved[2]; }; struct mlx4_if_stat_basic { struct mlx4_if_stat_control control; struct { __be64 IfRxFrames; __be64 IfRxOctets; __be64 IfTxFrames; __be64 IfTxOctets; } counters[]; }; #define MLX4_IF_STAT_BSC_SZ(ports)(sizeof(struct mlx4_if_stat_extended) +\ sizeof(((struct mlx4_if_stat_extended *)0)->\ counters[0]) * ports) struct mlx4_if_stat_extended { struct mlx4_if_stat_control control; struct { __be64 IfRxUnicastFrames; __be64 IfRxUnicastOctets; __be64 IfRxMulticastFrames; __be64 IfRxMulticastOctets; __be64 IfRxBroadcastFrames; __be64 IfRxBroadcastOctets; __be64 IfRxNoBufferFrames; __be64 IfRxNoBufferOctets; __be64 IfRxErrorFrames; __be64 IfRxErrorOctets; __be32 reserved[39]; __be64 IfTxUnicastFrames; __be64 IfTxUnicastOctets; __be64 IfTxMulticastFrames; __be64 IfTxMulticastOctets; __be64 IfTxBroadcastFrames; __be64 IfTxBroadcastOctets; __be64 IfTxDroppedFrames; __be64 IfTxDroppedOctets; __be64 IfTxRequestedFramesSent; __be64 IfTxGeneratedFramesSent; __be64 IfTxTsoOctets; } __packed counters[]; }; #define MLX4_IF_STAT_EXT_SZ(ports) (sizeof(struct mlx4_if_stat_extended) +\ sizeof(((struct mlx4_if_stat_extended *)\ 0)->counters[0]) * ports) union mlx4_counter { struct mlx4_if_stat_control control; struct mlx4_if_stat_basic basic; struct mlx4_if_stat_extended ext; }; #define MLX4_IF_STAT_SZ(ports) MLX4_IF_STAT_EXT_SZ(ports) struct mlx4_quotas { int qp; int cq; int srq; int mpt; int mtt; int counter; int xrcd; }; struct mlx4_dev { struct pci_dev *pdev; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; struct mlx4_phys_caps phys_caps; struct mlx4_quotas quotas; struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; u16 vsd_vendor_id; char vsd[MLX4_VSD_LEN]; int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; }; struct mlx4_clock_params { u64 offset; u8 bar; u8 size; }; struct mlx4_eqe { u8 reserved1; u8 type; u8 reserved2; u8 subtype; union { u32 raw[6]; struct { __be32 cqn; } __packed comp; struct { u16 reserved1; __be16 token; u32 reserved2; u8 reserved3[3]; u8 status; __be64 out_param; } __packed cmd; struct { __be32 qpn; } __packed qp; struct { __be32 srqn; } __packed srq; struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; u8 syndrome; } __packed cq_err; struct { u32 reserved1[2]; __be32 port; } __packed port_change; struct { #define COMM_CHANNEL_BIT_ARRAY_SIZE 4 u32 reserved; u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; } __packed comm_channel_arm; struct { u8 port; u8 reserved[3]; __be64 mac; } __packed mac_update; struct { __be32 slave_id; } __packed flr_event; struct { __be16 current_temperature; __be16 warning_threshold; } __packed warming; struct { u8 reserved[3]; u8 port; union { struct { __be16 mstr_sm_lid; __be16 port_lid; __be32 changed_attr; u8 reserved[3]; u8 mstr_sm_sl; __be64 gid_prefix; } __packed port_info; struct { __be32 block_ptr; __be32 tbl_entries_mask; } __packed tbl_change_info; } params; } __packed port_mgmt_change; struct { u8 reserved[3]; u8 port; u32 reserved1[5]; } __packed bad_cable; } event; u8 slave_id; u8 reserved3[2]; u8 owner; } __packed; struct mlx4_init_port_param { int set_guid0; int set_node_guid; int set_si_guid; u16 mtu; int port_width_cap; u16 vl_cap; u16 max_gid; u16 max_pkey; u64 guid0; u64 node_guid; u64 si_guid; }; #define mlx4_foreach_port(port, dev, type) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) #define mlx4_foreach_non_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX 0xff void handle_port_mgmt_change_event(struct work_struct *work); static inline int mlx4_master_func_num(struct mlx4_dev *dev) { return dev->caps.function; } static inline int mlx4_is_master(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_MASTER; } static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev) { return dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev); } static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { return (qpn < dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); } static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) { int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8; if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8) return 1; return 0; } static inline int mlx4_is_mfunc(struct mlx4_dev *dev) { return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER); } static inline int mlx4_is_slave(struct mlx4_dev *dev) { return dev->flags & MLX4_FLAG_SLAVE; } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return buf->direct.buf + offset; + return (u8 *)buf->direct.buf + offset; else - return buf->page_list[offset >> PAGE_SHIFT].buf + + return (u8 *)buf->page_list[offset >> PAGE_SHIFT].buf + (offset & (PAGE_SIZE - 1)); } int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node); void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf); int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift, struct mlx4_mtt *mtt); void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt); u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, struct mlx4_mw *mw); void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, int size, int max_direct); void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres, int size); int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, int timestamp_en); void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq); int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], int block_mcast_loopback, enum mlx4_protocol prot); int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol prot); int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], u8 port, int block_mcast_loopback, enum mlx4_protocol protocol, u64 *reg_id); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], enum mlx4_protocol protocol, u64 reg_id); enum { MLX4_DOMAIN_UVERBS = 0x1000, MLX4_DOMAIN_ETHTOOL = 0x2000, MLX4_DOMAIN_RFS = 0x3000, MLX4_DOMAIN_NIC = 0x5000, }; enum mlx4_net_trans_rule_id { MLX4_NET_TRANS_RULE_ID_ETH = 0, MLX4_NET_TRANS_RULE_ID_IB, MLX4_NET_TRANS_RULE_ID_IPV6, MLX4_NET_TRANS_RULE_ID_IPV4, MLX4_NET_TRANS_RULE_ID_TCP, MLX4_NET_TRANS_RULE_ID_UDP, MLX4_NET_TRANS_RULE_NUM, /* should be last */ MLX4_NET_TRANS_RULE_DUMMY = -1, /* force enum to be signed */ }; extern const u16 __sw_id_hw[]; static inline int map_hw_to_sw_id(u16 header_id) { int i; for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) { if (header_id == __sw_id_hw[i]) return i; } return -EINVAL; } enum mlx4_net_trans_promisc_mode { MLX4_FS_REGULAR = 1, MLX4_FS_ALL_DEFAULT, MLX4_FS_MC_DEFAULT, MLX4_FS_UC_SNIFFER, MLX4_FS_MC_SNIFFER, MLX4_FS_MODE_NUM, /* should be last */ MLX4_FS_MODE_DUMMY = -1, /* force enum to be signed */ }; struct mlx4_spec_eth { u8 dst_mac[6]; u8 dst_mac_msk[6]; u8 src_mac[6]; u8 src_mac_msk[6]; u8 ether_type_enable; __be16 ether_type; __be16 vlan_id_msk; __be16 vlan_id; }; struct mlx4_spec_tcp_udp { __be16 dst_port; __be16 dst_port_msk; __be16 src_port; __be16 src_port_msk; }; struct mlx4_spec_ipv4 { __be32 dst_ip; __be32 dst_ip_msk; __be32 src_ip; __be32 src_ip_msk; }; struct mlx4_spec_ib { __be32 l3_qpn; __be32 qpn_msk; u8 dst_gid[16]; u8 dst_gid_msk[16]; }; struct mlx4_spec_list { struct list_head list; enum mlx4_net_trans_rule_id id; union { struct mlx4_spec_eth eth; struct mlx4_spec_ib ib; struct mlx4_spec_ipv4 ipv4; struct mlx4_spec_tcp_udp tcp_udp; }; }; enum mlx4_net_trans_hw_rule_queue { MLX4_NET_TRANS_Q_FIFO, MLX4_NET_TRANS_Q_LIFO, }; struct mlx4_net_trans_rule { struct list_head list; enum mlx4_net_trans_hw_rule_queue queue_mode; bool exclusive; bool allow_loopback; enum mlx4_net_trans_promisc_mode promisc_mode; u8 port; u16 priority; u32 qpn; }; struct mlx4_net_trans_rule_hw_ctrl { __be16 prio; u8 type; u8 flags; u8 rsvd1; u8 funcid; u8 vep; u8 port; __be32 qpn; __be32 rsvd2; }; struct mlx4_net_trans_rule_hw_ib { u8 size; u8 rsvd1; __be16 id; u32 rsvd2; __be32 l3_qpn; __be32 qpn_mask; u8 dst_gid[16]; u8 dst_gid_msk[16]; } __packed; struct mlx4_net_trans_rule_hw_eth { u8 size; u8 rsvd; __be16 id; u8 rsvd1[6]; u8 dst_mac[6]; u16 rsvd2; u8 dst_mac_msk[6]; u16 rsvd3; u8 src_mac[6]; u16 rsvd4; u8 src_mac_msk[6]; u8 rsvd5; u8 ether_type_enable; __be16 ether_type; __be16 vlan_tag_msk; __be16 vlan_tag; } __packed; struct mlx4_net_trans_rule_hw_tcp_udp { u8 size; u8 rsvd; __be16 id; __be16 rsvd1[3]; __be16 dst_port; __be16 rsvd2; __be16 dst_port_msk; __be16 rsvd3; __be16 src_port; __be16 rsvd4; __be16 src_port_msk; } __packed; struct mlx4_net_trans_rule_hw_ipv4 { u8 size; u8 rsvd; __be16 id; __be32 rsvd1; __be32 dst_ip; __be32 dst_ip_msk; __be32 src_ip; __be32 src_ip_msk; } __packed; struct _rule_hw { union { struct { u8 size; u8 rsvd; __be16 id; }; struct mlx4_net_trans_rule_hw_eth eth; struct mlx4_net_trans_rule_hw_ib ib; struct mlx4_net_trans_rule_hw_ipv4 ipv4; struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp; }; }; int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port, enum mlx4_net_trans_promisc_mode mode); int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port); int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port); int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); void mlx4_set_stats_bitmap(struct mlx4_dev *dev, unsigned long *stats_bitmap); int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu, u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx); int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn, u8 promisc); int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, int npages, u64 iova, u32 *lkey, u32 *rkey); int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, int max_maps, u8 page_shift, struct mlx4_fmr *fmr); int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr); void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length, u8 op_modifier, u32 in_offset[], u32 counter_out[]); int mlx4_test_interrupts(struct mlx4_dev *dev); int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector); void mlx4_release_eq(struct mlx4_dev *dev, int vec); int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port); int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port); int mlx4_counter_alloc(struct mlx4_dev *dev, u8 port, u32 *idx); void mlx4_counter_free(struct mlx4_dev *dev, u8 port, u32 idx); int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); int map_sw_to_hw_steering_mode(struct mlx4_dev *dev, enum mlx4_net_trans_promisc_mode flow_type); int map_sw_to_hw_steering_id(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); int hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id); void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val); int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); int mlx4_is_slave_active(struct mlx4_dev *dev, int slave); int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port); int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port); int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 lid, u8 sl); int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change); enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id); int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid); int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn); int mlx4_read_clock(struct mlx4_dev *dev); int mlx4_get_internal_clock_params(struct mlx4_dev *dev, struct mlx4_clock_params *params); #endif /* MLX4_DEVICE_H */ Index: stable/9/sys/ofed/include/linux/mlx4/qp.h =================================================================== --- stable/9/sys/ofed/include/linux/mlx4/qp.h (revision 279733) +++ stable/9/sys/ofed/include/linux/mlx4/qp.h (revision 279734) @@ -1,445 +1,447 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef MLX4_QP_H #define MLX4_QP_H #include #include #define MLX4_INVALID_LKEY 0x100 enum ib_m_qp_attr_mask { IB_M_EXT_CLASS_1 = 1 << 28, IB_M_EXT_CLASS_2 = 1 << 29, IB_M_EXT_CLASS_3 = 1 << 30, IB_M_QP_MOD_VEND_MASK = (IB_M_EXT_CLASS_1 | IB_M_EXT_CLASS_2 | IB_M_EXT_CLASS_3) }; enum mlx4_qp_optpar { MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, MLX4_QP_OPTPAR_RRE = 1 << 1, MLX4_QP_OPTPAR_RAE = 1 << 2, MLX4_QP_OPTPAR_RWE = 1 << 3, MLX4_QP_OPTPAR_PKEY_INDEX = 1 << 4, MLX4_QP_OPTPAR_Q_KEY = 1 << 5, MLX4_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, MLX4_QP_OPTPAR_SRA_MAX = 1 << 8, MLX4_QP_OPTPAR_RRA_MAX = 1 << 9, MLX4_QP_OPTPAR_PM_STATE = 1 << 10, MLX4_QP_OPTPAR_RETRY_COUNT = 1 << 12, MLX4_QP_OPTPAR_RNR_RETRY = 1 << 13, MLX4_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, MLX4_QP_OPTPAR_SCHED_QUEUE = 1 << 16, MLX4_QP_OPTPAR_COUNTER_INDEX = 1 << 20 }; enum mlx4_qp_state { MLX4_QP_STATE_RST = 0, MLX4_QP_STATE_INIT = 1, MLX4_QP_STATE_RTR = 2, MLX4_QP_STATE_RTS = 3, MLX4_QP_STATE_SQER = 4, MLX4_QP_STATE_SQD = 5, MLX4_QP_STATE_ERR = 6, MLX4_QP_STATE_SQ_DRAINING = 7, MLX4_QP_NUM_STATE }; enum { MLX4_QP_ST_RC = 0x0, MLX4_QP_ST_UC = 0x1, MLX4_QP_ST_RD = 0x2, MLX4_QP_ST_UD = 0x3, MLX4_QP_ST_XRC = 0x6, MLX4_QP_ST_MLX = 0x7 }; enum { MLX4_QP_PM_MIGRATED = 0x3, MLX4_QP_PM_ARMED = 0x0, MLX4_QP_PM_REARM = 0x1 }; enum { /* params1 */ MLX4_QP_BIT_SRE = 1 << 15, MLX4_QP_BIT_SWE = 1 << 14, MLX4_QP_BIT_SAE = 1 << 13, /* params2 */ MLX4_QP_BIT_RRE = 1 << 15, MLX4_QP_BIT_RWE = 1 << 14, MLX4_QP_BIT_RAE = 1 << 13, MLX4_QP_BIT_RIC = 1 << 4, MLX4_QP_BIT_COLL_SYNC_RQ = 1 << 2, MLX4_QP_BIT_COLL_SYNC_SQ = 1 << 1, MLX4_QP_BIT_COLL_MASTER = 1 << 0 }; enum { MLX4_RSS_HASH_XOR = 0, MLX4_RSS_HASH_TOP = 1, MLX4_RSS_UDP_IPV6 = 1 << 0, MLX4_RSS_UDP_IPV4 = 1 << 1, MLX4_RSS_TCP_IPV6 = 1 << 2, MLX4_RSS_IPV6 = 1 << 3, MLX4_RSS_TCP_IPV4 = 1 << 4, MLX4_RSS_IPV4 = 1 << 5, /* offset of mlx4_rss_context within mlx4_qp_context.pri_path */ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24, /* offset of being RSS indirection QP within mlx4_qp_context.flags */ MLX4_RSS_QPC_FLAG_OFFSET = 13, }; struct mlx4_rss_context { __be32 base_qpn; __be32 default_qpn; u16 reserved; u8 hash_fn; u8 flags; __be32 rss_key[10]; __be32 base_qpn_udp; }; struct mlx4_qp_path { u8 fl; u8 vlan_control; u8 disable_pkey_check; u8 pkey_index; u8 counter_index; u8 grh_mylmc; __be16 rlid; u8 ackto; u8 mgid_index; u8 static_rate; u8 hop_limit; __be32 tclass_flowlabel; u8 rgid[16]; u8 sched_queue; u8 vlan_index; u8 feup; u8 fvl_rx; u8 reserved4[2]; u8 dmac[6]; }; enum { /* fl */ MLX4_FL_CV = 1 << 6, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, }; enum { /* vlan_control */ MLX4_VLAN_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7, MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED = 1 << 2, MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED = 1 << 1,/* 802.1p priorty tag*/ MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED = 1 << 0 }; enum { /* feup */ MLX4_FEUP_FORCE_ETH_UP = 1 << 6, /* force Eth UP */ MLX4_FSM_FORCE_ETH_SRC_MAC = 1 << 5, /* force Source MAC */ MLX4_FVL_FORCE_ETH_VLAN = 1 << 3 /* force Eth vlan */ }; enum { /* fvl_rx */ MLX4_FVL_RX_FORCE_ETH_VLAN = 1 << 0 /* enforce Eth rx vlan */ }; struct mlx4_qp_context { __be32 flags; __be32 pd; u8 mtu_msgmax; u8 rq_size_stride; u8 sq_size_stride; u8 rlkey; __be32 usr_page; __be32 local_qpn; __be32 remote_qpn; struct mlx4_qp_path pri_path; struct mlx4_qp_path alt_path; __be32 params1; u32 reserved1; __be32 next_send_psn; __be32 cqn_send; u32 reserved2[2]; __be32 last_acked_psn; __be32 ssn; __be32 params2; __be32 rnr_nextrecvpsn; __be32 xrcd; __be32 cqn_recv; __be64 db_rec_addr; __be32 qkey; __be32 srqn; __be32 msn; __be16 rq_wqe_counter; __be16 sq_wqe_counter; u32 reserved3[2]; __be32 param3; __be32 nummmcpeers_basemkey; u8 log_page_size; u8 reserved4[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; u32 reserved5[10]; }; struct mlx4_update_qp_context { __be64 qp_mask; __be64 primary_addr_path_mask; __be64 secondary_addr_path_mask; u64 reserved1; struct mlx4_qp_context qp_context; u64 reserved2[58]; }; enum { MLX4_UPD_QP_MASK_PM_STATE = 32, MLX4_UPD_QP_MASK_VSD = 33, }; enum { MLX4_UPD_QP_PATH_MASK_PKEY_INDEX = 0 + 32, MLX4_UPD_QP_PATH_MASK_FSM = 1 + 32, MLX4_UPD_QP_PATH_MASK_MAC_INDEX = 2 + 32, MLX4_UPD_QP_PATH_MASK_FVL = 3 + 32, MLX4_UPD_QP_PATH_MASK_CV = 4 + 32, MLX4_UPD_QP_PATH_MASK_VLAN_INDEX = 5 + 32, MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN = 6 + 32, MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED = 7 + 32, MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P = 8 + 32, MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED = 9 + 32, MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED = 10 + 32, MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P = 11 + 32, MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED = 12 + 32, MLX4_UPD_QP_PATH_MASK_FEUP = 13 + 32, MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32, MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32, MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, }; enum { /* param3 */ MLX4_STRIP_VLAN = 1 << 30 }; /* Which firmware version adds support for NEC (NoErrorCompletion) bit */ #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232) enum { MLX4_WQE_CTRL_NEC = 1 << 29, MLX4_WQE_CTRL_FENCE = 1 << 6, MLX4_WQE_CTRL_CQ_UPDATE = 3 << 2, MLX4_WQE_CTRL_SOLICITED = 1 << 1, MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_VLAN = 1 << 6, MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; struct mlx4_wqe_ctrl_seg { __be32 owner_opcode; __be16 vlan_tag; u8 ins_vlan; u8 fence_size; /* * High 24 bits are SRC remote buffer; low 8 bits are flags: * [7] SO (strong ordering) * [5] TCP/UDP checksum * [4] IP checksum * [3:2] C (generate completion queue entry) * [1] SE (solicited event) * [0] FL (force loopback) */ union { __be32 srcrb_flags; __be16 srcrb_flags16[2]; }; /* * imm is immediate data for send/RDMA write w/ immediate; * also invalidation key for send with invalidate; input * modifier for WQEs on CCQs. */ __be32 imm; }; enum { MLX4_WQE_MLX_VL15 = 1 << 17, MLX4_WQE_MLX_SLR = 1 << 16 }; struct mlx4_wqe_mlx_seg { u8 owner; u8 reserved1[2]; u8 opcode; __be16 sched_prio; u8 reserved2; u8 size; /* * [17] VL15 * [16] SLR * [15:12] static rate * [11:8] SL * [4] ICRC * [3:2] C * [0] FL (force loopback) */ __be32 flags; __be16 rlid; u16 reserved3; }; struct mlx4_wqe_datagram_seg { __be32 av[8]; __be32 dqpn; __be32 qkey; __be16 vlan; u8 mac[6]; }; struct mlx4_wqe_lso_seg { __be32 mss_hdr_size; __be32 header[0]; }; enum mlx4_wqe_bind_seg_flags2 { MLX4_WQE_BIND_TYPE_2 = (1<<31), MLX4_WQE_BIND_ZERO_BASED = (1<<30), }; struct mlx4_wqe_bind_seg { __be32 flags1; __be32 flags2; __be32 new_rkey; __be32 lkey; __be64 addr; __be64 length; }; enum { MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ = 1 << 29, MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE = 1 << 30, MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1 << 31 }; struct mlx4_wqe_fmr_seg { __be32 flags; __be32 mem_key; __be64 buf_list; __be64 start_addr; __be64 reg_len; __be32 offset; __be32 page_size; u32 reserved[2]; }; struct mlx4_wqe_fmr_ext_seg { u8 flags; u8 reserved; __be16 app_mask; __be16 wire_app_tag; __be16 mem_app_tag; __be32 wire_ref_tag_base; __be32 mem_ref_tag_base; }; struct mlx4_wqe_local_inval_seg { u64 reserved1; __be32 mem_key; u32 reserved2; u64 reserved3[2]; }; struct mlx4_wqe_raddr_seg { __be64 raddr; __be32 rkey; u32 reserved; }; struct mlx4_wqe_atomic_seg { __be64 swap_add; __be64 compare; }; struct mlx4_wqe_masked_atomic_seg { __be64 swap_add; __be64 compare; __be64 swap_add_mask; __be64 compare_mask; }; struct mlx4_wqe_data_seg { __be32 byte_count; __be32 lkey; __be64 addr; }; enum { MLX4_INLINE_ALIGN = 64, MLX4_INLINE_SEG = 1 << 31, }; struct mlx4_wqe_inline_seg { __be32 byte_count; }; int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, int sqd_event, struct mlx4_qp *qp); int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, struct mlx4_qp_context *context); int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_qp_context *context, struct mlx4_qp *qp, enum mlx4_qp_state *qp_state); static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1)); } void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp); #endif /* MLX4_QP_H */ Index: stable/9/sys =================================================================== --- stable/9/sys (revision 279733) +++ stable/9/sys (revision 279734) Property changes on: stable/9/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r279584