diff options
author | Keith Busch <kbusch@kernel.org> | 2023-12-06 10:48:30 -0800 |
---|---|---|
committer | Keith Busch <kbusch@kernel.org> | 2024-01-08 10:09:29 -0800 |
commit | 172fb49600c2b96a4ade255fbfc3fe7098b8afd3 (patch) | |
tree | 180dd0dd50c68cdd329bcddbdce52449b88fc6e3 /drivers/nvme | |
parent | a7de1dea76cd6a3707707af4ea2f8bc3cdeaeb11 (diff) | |
download | linux-172fb49600c2b96a4ade255fbfc3fe7098b8afd3.tar.gz linux-172fb49600c2b96a4ade255fbfc3fe7098b8afd3.tar.bz2 linux-172fb49600c2b96a4ade255fbfc3fe7098b8afd3.zip |
nvme-pci: enhance timeout kernel log
Kernel configs don't necessarily have opcode decoding, and some opcodes
are not even decodable. It is still interesting for debugging SSD issues
to know what opcode is timing out, what request type it came from, and
the data size (if applicable).
Also print the command_id along side blk-mq's tag to help match commands
with protocol wire traces and firmware logs,
Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/pci.c | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f27202680741..75e763ce09aa 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) struct request *abort_req; struct nvme_command cmd = { }; u32 csts = readl(dev->bar + NVME_REG_CSTS); + u8 opcode; /* If PCI error recovery process is happening, we cannot reset or * the recovery mechanism will surely fail. @@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, completion polled\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, completion polled\n", + req->tag, nvme_cid(req), nvmeq->qid); return BLK_EH_DONE; } @@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) fallthrough; case NVME_CTRL_DELETING: dev_warn_ratelimited(dev->ctrl.device, - "I/O %d QID %d timeout, disable controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) QID %d timeout, disable controller\n", + req->tag, nvme_cid(req), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; nvme_dev_disable(dev, true); return BLK_EH_DONE; @@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * command was already aborted once before and still hasn't been * returned to the driver, or if this is the admin queue. */ + opcode = nvme_req(req)->cmd->common.opcode; if (!nvmeq->qid || iod->aborted) { dev_warn(dev->ctrl.device, - "I/O %d QID %d timeout, reset controller\n", - req->tag, nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n", + req->tag, nvme_cid(req), opcode, + nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; goto disable; } @@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) cmd.abort.sqid = cpu_to_le16(nvmeq->qid); dev_warn(nvmeq->dev->ctrl.device, - "I/O %d (%s) QID %d timeout, aborting\n", - req->tag, - nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode), - nvmeq->qid); + "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n", + req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode), + nvmeq->qid, blk_op_str(req_op(req)), req_op(req), + blk_rq_bytes(req)); abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd), BLK_MQ_REQ_NOWAIT); |