From 2808b37b59288ad8f1897e3546c2296df3384b65 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 2 Nov 2022 23:55:38 -0700 Subject: net/mlx5: Allow async trigger completion execution on single CPU systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a single CPU system, the kernel thread executing mlx5_cmd_flush() never releases the CPU but calls down_trylock(&cmd→sem) in a busy loop. On a single processor system, this leads to a deadlock as the kernel thread which executes mlx5_cmd_invoke() never gets scheduled. Fix this, by adding the cond_resched() call to the loop, allow the command completion kernel thread to execute. Fixes: 8e715cd613a1 ("net/mlx5: Set command entry semaphore up once got index free") Signed-off-by: Alexander Schmidt Signed-off-by: Roy Novich Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/cmd.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 46ba4c2faad2..2e0d59ca62b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1770,12 +1770,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - while (down_trylock(&cmd->sem)) + for (i = 0; i < cmd->max_reg_cmds; i++) { + while (down_trylock(&cmd->sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } - while (down_trylock(&cmd->pages_sem)) + while (down_trylock(&cmd->pages_sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } /* Unlock cmdif */ up(&cmd->pages_sem); -- cgit v1.2.3 From 870c2481174b839e7159555127bc8b5a5d0699ba Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 31 May 2022 09:14:03 +0300 Subject: net/mlx5: cmdif, Print info on any firmware cmd failure to tracepoint While moving to new CMD API (quiet API), some pre-existing flows may call the new API function that in case of error, returns the error instead of printing it as previously done. For such flows we bring back the print but to tracepoint this time for sys admins to have the ability to check for errors especially for commands using the new quiet API. Tracepoint output example: devlink-1333 [001] ..... 822.746922: mlx5_cmd: ACCESS_REG(0x805) op_mod(0x0) failed, status bad resource(0x5), syndrome (0xb06e1f), err(-22) Fixes: f23519e542e5 ("net/mlx5: cmdif, Add new api for command execution") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 ++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/cmd.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2e0d59ca62b5..df3e284ca5c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -45,6 +45,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" enum { CMD_IF_REV = 5, @@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err); static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { u16 opcode, op_mod; - u32 syndrome; - u8 status; u16 uid; - int err; - - syndrome = MLX5_GET(mbox_out, out, syndrome); - status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); - err = cmd_status_to_err(status); - if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) mlx5_cmd_out_err(dev, opcode, op_mod, out); - else - mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", - mlx5_command_str(opcode), opcode, op_mod, uid, - cmd_status_str(status), status, syndrome, err); } int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) @@ -1892,6 +1881,16 @@ out_in: return err; } +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, u32 syndrome, int err) { @@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ -static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); u8 status = MLX5_GET(mbox_out, out, status); @@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void * if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ err = -EIO; - if (!err && status != MLX5_CMD_STAT_OK) + if (!err && status != MLX5_CMD_STAT_OK) { err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } cmd_status_log(dev, opcode, status, syndrome, err); return err; @@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); - return err; + return cmd_status_err(dev, err, opcode, op_mod, out); } EXPORT_SYMBOL(mlx5_cmd_do); @@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); + err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec_polling); @@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_ctx *ctx; ctx = work->ctx; - status = cmd_status_err(ctx->dev, status, work->opcode, work->out); + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); @@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; work->opcode = MLX5_GET(mbox_in, in, opcode); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; -- cgit v1.2.3 From aaf2e65cac7f2e1ae729c2fbc849091df9699f96 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 17 Nov 2022 09:07:20 +0200 Subject: net/mlx5: Fix handling of entry refcount when command is not issued to FW In case command interface is down, or the command is not allowed, driver did not increment the entry refcount, but might have decrement as part of forced completion handling. Fix that by always increment and decrement the refcount to make it symmetric for all flows. Fixes: 50b2412b7e78 ("net/mlx5: Avoid possible free of command entry while timeout comp handler") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Reported-by: Jack Wang Tested-by: Jack Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/cmd.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index df3e284ca5c6..74bd05e5dda2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1005,6 +1005,7 @@ static void cmd_work_handler(struct work_struct *work) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { ent->ret = -ENXIO; @@ -1012,7 +1013,6 @@ static void cmd_work_handler(struct work_struct *work) return; } - cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1661,8 +1661,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force cmd_ent_put(ent); /* timeout work was canceled */ if (!forced || /* Real FW completion */ - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); ent->ts2 = ktime_get_ns(); -- cgit v1.2.3 From 3f5769a074c13d8f08455e40586600419e02a880 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 21 Nov 2022 19:22:04 +0800 Subject: net/mlx5: Fix uninitialized variable bug in outlen_write() If sscanf() return 0, outlen is uninitialized and used in kzalloc(), this is unexpected. We should return -EINVAL if the string is invalid. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: YueHaibing Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/cmd.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 74bd05e5dda2..e7a894ba5c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1497,8 +1497,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) -- cgit v1.2.3