You can subscribe to this list here.
2005 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(35) |
Oct
(73) |
Nov
(37) |
Dec
(60) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2006 |
Jan
(76) |
Feb
(301) |
Mar
(123) |
Apr
(29) |
May
(61) |
Jun
(45) |
Jul
(10) |
Aug
(63) |
Sep
(17) |
Oct
(28) |
Nov
(43) |
Dec
(60) |
2007 |
Jan
(71) |
Feb
(86) |
Mar
(20) |
Apr
|
May
(73) |
Jun
(67) |
Jul
(40) |
Aug
(67) |
Sep
(30) |
Oct
(38) |
Nov
(113) |
Dec
(159) |
2008 |
Jan
(208) |
Feb
(299) |
Mar
(105) |
Apr
(64) |
May
(328) |
Jun
(135) |
Jul
(221) |
Aug
(119) |
Sep
(132) |
Oct
(293) |
Nov
(180) |
Dec
(225) |
2009 |
Jan
(136) |
Feb
(161) |
Mar
(226) |
Apr
(363) |
May
(188) |
Jun
(269) |
Jul
(297) |
Aug
(141) |
Sep
(237) |
Oct
(191) |
Nov
(137) |
Dec
(211) |
2010 |
Jan
(141) |
Feb
(234) |
Mar
(254) |
Apr
(370) |
May
(215) |
Jun
(211) |
Jul
(199) |
Aug
(234) |
Sep
(294) |
Oct
(182) |
Nov
(197) |
Dec
(263) |
2011 |
Jan
(115) |
Feb
(135) |
Mar
(114) |
Apr
(177) |
May
(181) |
Jun
(116) |
Jul
(230) |
Aug
(166) |
Sep
(93) |
Oct
(119) |
Nov
(104) |
Dec
(146) |
2012 |
Jan
(138) |
Feb
(182) |
Mar
(190) |
Apr
(126) |
May
(131) |
Jun
(92) |
Jul
(103) |
Aug
(74) |
Sep
(41) |
Oct
(65) |
Nov
(90) |
Dec
(97) |
2013 |
Jan
(73) |
Feb
(80) |
Mar
(152) |
Apr
(93) |
May
(96) |
Jun
(49) |
Jul
(73) |
Aug
(72) |
Sep
(86) |
Oct
(115) |
Nov
(80) |
Dec
(61) |
2014 |
Jan
(115) |
Feb
(171) |
Mar
(191) |
Apr
(150) |
May
(51) |
Jun
(117) |
Jul
(131) |
Aug
(91) |
Sep
(92) |
Oct
(101) |
Nov
(62) |
Dec
(108) |
2015 |
Jan
(118) |
Feb
(58) |
Mar
(79) |
Apr
(99) |
May
(64) |
Jun
(95) |
Jul
(69) |
Aug
(74) |
Sep
(77) |
Oct
(94) |
Nov
(118) |
Dec
(50) |
2016 |
Jan
(47) |
Feb
(137) |
Mar
(159) |
Apr
(50) |
May
(45) |
Jun
(77) |
Jul
(63) |
Aug
(80) |
Sep
(58) |
Oct
(49) |
Nov
(28) |
Dec
(46) |
2017 |
Jan
(34) |
Feb
(36) |
Mar
(108) |
Apr
(127) |
May
(73) |
Jun
(90) |
Jul
(28) |
Aug
(39) |
Sep
(51) |
Oct
(60) |
Nov
(32) |
Dec
(57) |
2018 |
Jan
(73) |
Feb
(19) |
Mar
(32) |
Apr
(52) |
May
(42) |
Jun
(35) |
Jul
(50) |
Aug
(21) |
Sep
(19) |
Oct
(24) |
Nov
(47) |
Dec
(35) |
2019 |
Jan
(37) |
Feb
(51) |
Mar
(59) |
Apr
(44) |
May
(42) |
Jun
(36) |
Jul
(15) |
Aug
(3) |
Sep
(36) |
Oct
(44) |
Nov
(29) |
Dec
(14) |
2020 |
Jan
(16) |
Feb
(20) |
Mar
(10) |
Apr
(13) |
May
(49) |
Jun
(15) |
Jul
(37) |
Aug
(37) |
Sep
(11) |
Oct
(13) |
Nov
(49) |
Dec
(25) |
2021 |
Jan
(23) |
Feb
(12) |
Mar
(15) |
Apr
(15) |
May
(10) |
Jun
(17) |
Jul
(9) |
Aug
(39) |
Sep
(55) |
Oct
(25) |
Nov
(30) |
Dec
(17) |
2022 |
Jan
(10) |
Feb
(14) |
Mar
(1) |
Apr
(4) |
May
(34) |
Jun
(2) |
Jul
(13) |
Aug
(7) |
Sep
(2) |
Oct
(11) |
Nov
(2) |
Dec
(5) |
2023 |
Jan
(4) |
Feb
(7) |
Mar
(4) |
Apr
|
May
|
Jun
(10) |
Jul
(1) |
Aug
(6) |
Sep
(3) |
Oct
|
Nov
|
Dec
(1) |
2024 |
Jan
(8) |
Feb
(2) |
Mar
(6) |
Apr
(2) |
May
|
Jun
(1) |
Jul
(1) |
Aug
(1) |
Sep
(2) |
Oct
(1) |
Nov
|
Dec
(1) |
2025 |
Jan
|
Feb
|
Mar
(2) |
Apr
|
May
(1) |
Jun
|
Jul
|
Aug
(4) |
Sep
(59) |
Oct
|
Nov
|
Dec
|
From: Tony B. <to...@cy...> - 2025-09-29 15:38:37
|
(target mode) struct atio7_fcp_cmnd is a variable-length data structure because of add_cdb_len, but it is embedded in struct atio_from_isp and copied around like a fixed-length data structure. For big CDBs > 16 bytes, get_datalen_for_atio() called on a fixed-length copy of the atio will access invalid memory. In some cases this can be fixed by moving the atio to the end of the data structure and using a variable-length allocation. In other cases such as allocating struct qla_tgt_cmd, the fixed-length data structures are preallocated for speed, so in the case that add_cdb_len != 0, allocate a separate buffer for the CDB. Also add memcpy_atio() as a safeguard against invalid memory accesses. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_target.c | 83 ++++++++++++++++++++++++++++--- drivers/scsi/qla2xxx/qla_target.h | 7 ++- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 69ccba3436ec..c2876b442a08 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -210,6 +210,10 @@ static void qlt_queue_unknown_atio(scsi_qla_host_t *vha, struct qla_tgt_sess_op *u; struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; unsigned long flags; + unsigned int add_cdb_len = 0; + + /* atio must be the last member of qla_tgt_sess_op for add_cdb_len */ + BUILD_BUG_ON(offsetof(struct qla_tgt_sess_op, atio) + sizeof(u->atio) != sizeof(*u)); if (tgt->tgt_stop) { ql_dbg(ql_dbg_async, vha, 0x502c, @@ -218,12 +222,17 @@ static void qlt_queue_unknown_atio(scsi_qla_host_t *vha, goto out_term; } - u = kzalloc(sizeof(*u), GFP_ATOMIC); + if (atio->u.raw.entry_type == ATIO_TYPE7 && + atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0) + add_cdb_len = + ((unsigned int) atio->u.isp24.fcp_cmnd.add_cdb_len) * 4; + + u = kzalloc(sizeof(*u) + add_cdb_len, GFP_ATOMIC); if (u == NULL) goto out_term; u->vha = vha; - memcpy(&u->atio, atio, sizeof(*atio)); + memcpy(&u->atio, atio, sizeof(*atio) + add_cdb_len); INIT_LIST_HEAD(&u->cmd_list); spin_lock_irqsave(&vha->cmd_list_lock, flags); @@ -3829,6 +3838,13 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) qlt_decr_num_pend_cmds(cmd->vha); BUG_ON(cmd->sg_mapped); + + if (unlikely(cmd->cdb != &cmd->atio.u.isp24.fcp_cmnd.cdb[0])) { + kfree(cmd->cdb); + cmd->cdb = &cmd->atio.u.isp24.fcp_cmnd.cdb[0]; + cmd->cdb_len = 16; + } + cmd->jiffies_at_free = get_jiffies_64(); if (!sess || !sess->se_sess) { @@ -4128,7 +4144,6 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) struct qla_hw_data *ha = vha->hw; struct fc_port *sess = cmd->sess; struct atio_from_isp *atio = &cmd->atio; - unsigned char *cdb; unsigned long flags; uint32_t data_length; int ret, fcp_task_attr, data_dir, bidi = 0; @@ -4144,7 +4159,6 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) goto out_term; } - cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->se_cmd.tag = le32_to_cpu(atio->u.isp24.exchange_addr); if (atio->u.isp24.fcp_cmnd.rddata && @@ -4162,7 +4176,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) atio->u.isp24.fcp_cmnd.task_attr); data_length = get_datalen_for_atio(atio); - ret = ha->tgt.tgt_ops->handle_cmd(vha, cmd, cdb, data_length, + ret = ha->tgt.tgt_ops->handle_cmd(vha, cmd, cmd->cdb, data_length, fcp_task_attr, data_dir, bidi); if (ret != 0) goto out_term; @@ -4183,6 +4197,11 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) qlt_send_term_exchange(qpair, NULL, &cmd->atio, 1); qlt_decr_num_pend_cmds(vha); + if (unlikely(cmd->cdb != &cmd->atio.u.isp24.fcp_cmnd.cdb[0])) { + kfree(cmd->cdb); + cmd->cdb = &cmd->atio.u.isp24.fcp_cmnd.cdb[0]; + cmd->cdb_len = 16; + } cmd->vha->hw->tgt.tgt_ops->rel_cmd(cmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); @@ -4306,18 +4325,43 @@ static void qlt_assign_qpair(struct scsi_qla_host *vha, cmd->se_cmd.cpuid = h->cpuid; } +/* + * Safely make a fixed-length copy of a variable-length atio by truncating the + * CDB if necessary. + */ +static void memcpy_atio(struct atio_from_isp *dst, + const struct atio_from_isp *src) +{ + int len; + + memcpy(dst, src, sizeof(*dst)); + + /* + * If the CDB was truncated, prevent get_datalen_for_atio() from + * accessing invalid memory. + */ + len = src->u.isp24.fcp_cmnd.add_cdb_len; + if (unlikely(len != 0)) { + dst->u.isp24.fcp_cmnd.add_cdb_len = 0; + memcpy(&dst->u.isp24.fcp_cmnd.add_cdb[0], + &src->u.isp24.fcp_cmnd.add_cdb[len * 4], + 4); + } +} + static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha, struct fc_port *sess, struct atio_from_isp *atio) { struct qla_tgt_cmd *cmd; + int add_cdb_len; cmd = vha->hw->tgt.tgt_ops->get_cmd(sess); if (!cmd) return NULL; cmd->cmd_type = TYPE_TGT_CMD; - memcpy(&cmd->atio, atio, sizeof(*atio)); + memcpy_atio(&cmd->atio, atio); INIT_LIST_HEAD(&cmd->sess_cmd_list); cmd->state = QLA_TGT_STATE_NEW; cmd->tgt = vha->vha_tgt.qla_tgt; @@ -4337,6 +4381,29 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha, cmd->vp_idx = vha->vp_idx; cmd->edif = sess->edif.enable; + cmd->cdb = &cmd->atio.u.isp24.fcp_cmnd.cdb[0]; + cmd->cdb_len = 16; + + /* + * NOTE: memcpy_atio() set cmd->atio.u.isp24.fcp_cmnd.add_cdb_len to 0, + * so use the original value here. + */ + add_cdb_len = atio->u.isp24.fcp_cmnd.add_cdb_len; + if (unlikely(add_cdb_len != 0)) { + int cdb_len = 16 + add_cdb_len * 4; + u8 *cdb; + + cdb = kmalloc(cdb_len, GFP_ATOMIC); + if (unlikely(!cdb)) { + vha->hw->tgt.tgt_ops->free_cmd(cmd); + return NULL; + } + /* CAUTION: copy CDB from atio not cmd->atio */ + memcpy(cdb, atio->u.isp24.fcp_cmnd.cdb, cdb_len); + cmd->cdb = cdb; + cmd->cdb_len = cdb_len; + } + return cmd; } @@ -5484,13 +5551,15 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, qlt_incr_num_pend_cmds(vha); INIT_LIST_HEAD(&cmd->cmd_list); - memcpy(&cmd->atio, atio, sizeof(*atio)); + memcpy_atio(&cmd->atio, atio); cmd->tgt = vha->vha_tgt.qla_tgt; cmd->vha = vha; cmd->reset_count = ha->base_qpair->chip_reset; cmd->q_full = 1; cmd->qpair = ha->base_qpair; + cmd->cdb = &cmd->atio.u.isp24.fcp_cmnd.cdb[0]; + cmd->cdb_len = 16; if (qfull) { cmd->q_full = 1; diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 223c40bc9498..97aa6d9cfc27 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -830,11 +830,13 @@ struct qla_tgt { struct qla_tgt_sess_op { struct scsi_qla_host *vha; uint32_t chip_reset; - struct atio_from_isp atio; struct work_struct work; struct list_head cmd_list; bool aborted; struct rsp_que *rsp; + + struct atio_from_isp atio; + /* DO NOT ADD ANYTHING ELSE HERE - atio must be last member */ }; enum trace_flags { @@ -925,8 +927,9 @@ struct qla_tgt_cmd { uint8_t scsi_status, sense_key, asc, ascq; struct crc_context *ctx; - const uint8_t *cdb; + uint8_t *cdb; uint64_t lba; + int cdb_len; uint16_t a_guard, e_guard, a_app_tag, e_app_tag; uint32_t a_ref_tag, e_ref_tag; #define DIF_BUNDL_DMA_VALID 1 -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 15:31:55
|
This enables the initiator to abort commands that are stuck pending in the HW without waiting for a timeout. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes This patch applies to the out-of-tree SCST project, not to the Linux kernel. Apply after all the other qla2xxx patches. qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c index 07aee6e81..cb58fae20 100644 --- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c +++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c @@ -81,6 +81,7 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd); static int sqa_rdy_to_xfer(struct scst_cmd *scst_cmd); static void sqa_on_free_cmd(struct scst_cmd *scst_cmd); static void sqa_task_mgmt_fn_done(struct scst_mgmt_cmd *mcmd); +static void sqa_on_abort_cmd(struct scst_cmd *scst_cmd); static int sqa_get_initiator_port_transport_id(struct scst_tgt *tgt, struct scst_session *scst_sess, uint8_t **transport_id); @@ -1255,6 +1256,7 @@ static struct scst_tgt_template sqa_scst_template = { .on_free_cmd = sqa_on_free_cmd, .task_mgmt_fn_done = sqa_task_mgmt_fn_done, + .on_abort_cmd = sqa_on_abort_cmd, .close_session = sqa_close_session, .get_initiator_port_transport_id = sqa_get_initiator_port_transport_id, @@ -1926,6 +1928,46 @@ out_unlock: TRACE_EXIT(); } +struct sqa_abort_work { + struct scst_cmd *scst_cmd; + struct work_struct abort_work; +}; + +static void sqa_on_abort_cmd_work(struct work_struct *work) +{ + struct sqa_abort_work *abort_work = + container_of(work, struct sqa_abort_work, abort_work); + struct scst_cmd *scst_cmd = abort_work->scst_cmd; + struct qla_tgt_cmd *cmd = scst_cmd_get_tgt_priv(scst_cmd); + + TRACE_ENTRY(); + + kfree(abort_work); + qlt_abort_cmd(cmd); + scst_cmd_put(scst_cmd); + + TRACE_EXIT(); +} + +static void sqa_on_abort_cmd(struct scst_cmd *scst_cmd) +{ + struct sqa_abort_work *abort_work; + + /* + * The caller holds sess->sess_list_lock, but qlt_abort_cmd() needs to + * acquire qpair->qp_lock_ptr (ha->hardware_lock); these locks are + * acquired in the reverse order elsewhere. Use a workqueue to avoid + * acquiring the locks in the wrong order here. + */ + abort_work = kmalloc(sizeof(*abort_work), GFP_ATOMIC); + if (!abort_work) + return; + scst_cmd_get(scst_cmd); + abort_work->scst_cmd = scst_cmd; + INIT_WORK(&abort_work->abort_work, sqa_on_abort_cmd_work); + schedule_work(&abort_work->abort_work); +} + /* * The following structure defines the callbacks which will be executed * from functions in the qla_target.c file back to this interface -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:53:17
|
(target mode) The driver associates two different structs with numeric handles and passes the handles to the hardware. When the hardware passes the handle back to the driver, the driver consults a table of void * to convert the handle back to the struct without checking the type of struct. This can lead to type confusion if the HBA firmware misbehaves (and some firmware versions do). So verify the type of struct is what is expected before using it. But we can also do better than that. Also verify that the exchange address of the message sent from the hardware matches the exchange address of the command being returned. This adds an extra guard against buggy HBA firmware that returns duplicate messages multiple times (which has also been seen) in case the driver has reused the handle for a different command of the same type. These problems were seen on a QLE2694L with firmware 9.08.02 when testing SLER / SRR support. The SRR caused the HBA to flood the response queue with hundreds of bogus entries. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: shorten code comment due to the removal of unsafe code from the prior v1 patch "scsi: qla2xxx: fix oops during cmd abort". drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_target.c | 109 ++++++++++++++++++++++++------ 2 files changed, 90 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 9f56bec26231..a7e3ec9bba47 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -54,7 +54,7 @@ * | Misc | 0xd303 | 0xd031-0xd0ff | * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | - * | Target Mode | 0xe086 | | + * | Target Mode | 0xe089 | | * | Target Mode Management | 0xf09b | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000d | | diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a59742ca51ec..d82c7022f3d7 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4000,7 +4000,8 @@ static int qlt_prepare_srr_ctio(struct qla_qpair *qpair, /* ha->hardware_lock supposed to be held on entry */ static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha, - struct rsp_que *rsp, uint32_t handle, void *ctio) + struct rsp_que *rsp, uint32_t handle, uint8_t cmd_type, + const void *ctio) { void *cmd = NULL; struct req_que *req; @@ -4023,29 +4024,97 @@ static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha, h &= QLA_CMD_HANDLE_MASK; - if (h != QLA_TGT_NULL_HANDLE) { - if (unlikely(h >= req->num_outstanding_cmds)) { - ql_dbg(ql_dbg_tgt, vha, 0xe052, - "qla_target(%d): Wrong handle %x received\n", - vha->vp_idx, handle); - return NULL; - } - - cmd = req->outstanding_cmds[h]; - if (unlikely(cmd == NULL)) { - ql_dbg(ql_dbg_async, vha, 0xe053, - "qla_target(%d): Suspicious: unable to find the command with handle %x req->id %d rsp->id %d\n", - vha->vp_idx, handle, req->id, rsp->id); - return NULL; - } - req->outstanding_cmds[h] = NULL; - } else if (ctio != NULL) { + if (h == QLA_TGT_NULL_HANDLE) { /* We can't get loop ID from CTIO7 */ ql_dbg(ql_dbg_tgt, vha, 0xe054, "qla_target(%d): Wrong CTIO received: QLA24xx doesn't " "support NULL handles\n", vha->vp_idx); return NULL; } + if (unlikely(h >= req->num_outstanding_cmds)) { + ql_dbg(ql_dbg_tgt, vha, 0xe052, + "qla_target(%d): Wrong handle %x received\n", + vha->vp_idx, handle); + return NULL; + } + + /* + * We passed a numeric handle for a cmd to the hardware, and the + * hardware passed the handle back to us. Look up the associated cmd, + * and validate that the cmd_type and exchange address match what the + * caller expects. This guards against buggy HBA firmware that returns + * the same CTIO multiple times. + */ + + cmd = req->outstanding_cmds[h]; + + if (unlikely(cmd == NULL)) { + if (cmd_type == TYPE_TGT_CMD) { + __le32 ctio_exchange_addr = + ((const struct ctio7_from_24xx *)ctio)-> + exchange_address; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe053, + "qla_target(%d): tag %u: handle %x: cmd detached; ignoring CTIO (handle %x req->id %d rsp->id %d)\n", + vha->vp_idx, le32_to_cpu(ctio_exchange_addr), h, + handle, req->id, rsp->id); + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe053, + "qla_target(%d): cmd detached; ignoring CTIO (handle %x req->id %d rsp->id %d)\n", + vha->vp_idx, handle, req->id, rsp->id); + } + return NULL; + } + + if (unlikely(((srb_t *)cmd)->cmd_type != cmd_type)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe087, + "qla_target(%d): handle %x: cmd detached; ignoring CTIO (cmd_type mismatch)\n", + vha->vp_idx, h); + return NULL; + } + + switch (cmd_type) { + case TYPE_TGT_CMD: { + __le32 ctio_exchange_addr = + ((const struct ctio7_from_24xx *)ctio)-> + exchange_address; + __le32 cmd_exchange_addr = + ((struct qla_tgt_cmd *)cmd)-> + atio.u.isp24.exchange_addr; + + BUILD_BUG_ON(offsetof(struct ctio7_from_24xx, + exchange_address) != + offsetof(struct ctio_crc_from_fw, + exchange_address)); + + if (unlikely(ctio_exchange_addr != cmd_exchange_addr)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe088, + "qla_target(%d): tag %u: handle %x: cmd detached; ignoring CTIO (exchange address mismatch)\n", + vha->vp_idx, le32_to_cpu(ctio_exchange_addr), h); + return NULL; + } + break; + } + + case TYPE_TGT_TMCMD: { + __le32 ctio_exchange_addr = + ((const struct abts_resp_from_24xx_fw *)ctio)-> + exchange_address; + __le32 cmd_exchange_addr = + ((struct qla_tgt_mgmt_cmd *)cmd)-> + orig_iocb.abts.exchange_address; + + if (unlikely(ctio_exchange_addr != cmd_exchange_addr)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe089, + "qla_target(%d): ABTS: handle %x: cmd detached; ignoring CTIO (exchange address mismatch)\n", + vha->vp_idx, h); + return NULL; + } + break; + } + } + + req->outstanding_cmds[h] = NULL; return cmd; } @@ -4074,7 +4143,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, ctio_flags = le16_to_cpu(ctio->flags); - cmd = qlt_ctio_to_cmd(vha, rsp, handle, ctio); + cmd = qlt_ctio_to_cmd(vha, rsp, handle, TYPE_TGT_CMD, ctio); if (unlikely(cmd == NULL)) { if ((handle & ~QLA_TGT_HANDLE_MASK) == QLA_TGT_SKIP_HANDLE && (ctio_flags & 0xe1ff) == (CTIO7_FLAGS_STATUS_MODE_1 | @@ -6836,7 +6905,7 @@ static void qlt_handle_abts_completion(struct scsi_qla_host *vha, struct qla_tgt_mgmt_cmd *mcmd; struct qla_hw_data *ha = vha->hw; - mcmd = qlt_ctio_to_cmd(vha, rsp, pkt->handle, pkt); + mcmd = qlt_ctio_to_cmd(vha, rsp, pkt->handle, TYPE_TGT_TMCMD, pkt); if (mcmd == NULL && h != QLA_TGT_SKIP_HANDLE) { ql_dbg(ql_dbg_async, vha, 0xe064, "qla_target(%d): ABTS Comp without mcmd\n", -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:52:04
|
This patch applies to the out-of-tree SCST project, not to the Linux kernel. Apply when importing the upstream patch with the same title. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: sqa_on_hw_pending_cmd_timeout() changes had to be redone due to changes in prior patch "scsi: qla2xxx: fix races with aborting commands". qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 157 +++++++++++++++--- 1 file changed, 134 insertions(+), 23 deletions(-) diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c index fb5956346..3fca87ff0 100644 --- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c +++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c @@ -72,6 +72,8 @@ #endif #endif +#define sense_initiator_detected_error HARDWARE_ERROR, 0x48, 0x00 + static LIST_HEAD(sqa_tgt_glist); /* Function definitions for callbacks from the SCST target core. */ @@ -395,6 +397,17 @@ static struct qla_tgt_cmd *sqa_qla2xxx_get_cmd(struct fc_port *sess) return cmd; } +static int sqa_qla2xxx_get_cmd_ref(struct qla_tgt_cmd *cmd) +{ + scst_cmd_get(cmd->scst_cmd); + return 0; +} + +static void sqa_qla2xxx_put_cmd_ref(struct qla_tgt_cmd *cmd) +{ + scst_cmd_put(cmd->scst_cmd); +} + static DEFINE_MUTEX(sqa_mutex); @@ -527,8 +540,12 @@ static void sqa_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) break; case DIF_ERR_NONE: default: - scst_set_cmd_error(scst_cmd, - SCST_LOAD_SENSE(scst_sense_aborted_command)); + if (cmd->srr_failed) + scst_set_cmd_error(scst_cmd, + SCST_LOAD_SENSE(sense_initiator_detected_error)); + else + scst_set_cmd_error(scst_cmd, + SCST_LOAD_SENSE(scst_sense_aborted_command)); break; } } @@ -1546,6 +1563,11 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd) } } + if (unlikely(cmd->free_sg)) { + cmd->free_sg = 0; + qlt_free_sg(cmd); + } + cmd->bufflen = scst_cmd_get_adjusted_resp_data_len(scst_cmd); cmd->sg = scst_cmd_get_sg(scst_cmd); cmd->sg_cnt = scst_cmd_get_sg_cnt(scst_cmd); @@ -1556,6 +1578,15 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd) cmd->lba = scst_cmd_get_lba(scst_cmd); cmd->trc_flags |= TRC_XMIT_STATUS; + /* + * se_cmd::data_length,t_data_sg,t_data_nents used by + * qlt_restore_orig_sg() + */ + cmd->se_cmd.data_length = cmd->bufflen; + cmd->se_cmd.t_data_sg = cmd->sg; + cmd->se_cmd.t_data_nents = cmd->sg_cnt; + cmd->se_cmd.scsi_status = cmd->scsi_status; + #if QLA_ENABLE_PI if (scst_get_tgt_dif_actions(scst_cmd->cmd_dif_actions)) { cmd->blk_sz = scst_cmd_get_block_size(scst_cmd); @@ -1600,7 +1631,7 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd) cmd->bufflen, cmd->sg_cnt, cmd->dma_data_direction, cmd->se_cmd.residual_count); - res = qlt_xmit_response(cmd, xmit_type, scst_cmd_get_status(scst_cmd)); + res = qlt_xmit_response(cmd, xmit_type, cmd->scsi_status); switch (res) { case 0: @@ -1630,16 +1661,30 @@ static int sqa_rdy_to_xfer(struct scst_cmd *scst_cmd) TRACE(TRACE_SCSI, "sqatgt(%ld/%d): tag=%lld", cmd->vha->host_no, cmd->vha->vp_idx, scst_cmd_get_tag(scst_cmd)); + if (unlikely(cmd->free_sg)) { + cmd->free_sg = 0; + qlt_free_sg(cmd); + } + cmd->bufflen = scst_cmd_get_write_fields(scst_cmd, &cmd->sg, &cmd->sg_cnt); + cmd->dma_data_direction = scst_to_tgt_dma_dir(scst_cmd_get_data_direction(scst_cmd)); + cmd->offset = 0; - cmd->sg = scst_cmd_get_sg(scst_cmd); - cmd->sg_cnt = scst_cmd_get_sg_cnt(scst_cmd); cmd->scsi_status = scst_cmd_get_status(scst_cmd); cmd->trc_flags |= TRC_XFR_RDY; + /* + * se_cmd::data_length,t_data_sg,t_data_nents used by + * qlt_restore_orig_sg() + */ + cmd->se_cmd.data_length = cmd->bufflen; + cmd->se_cmd.t_data_sg = cmd->sg; + cmd->se_cmd.t_data_nents = cmd->sg_cnt; + cmd->se_cmd.scsi_status = cmd->scsi_status; + #if QLA_ENABLE_PI if (scst_get_tgt_dif_actions(scst_cmd->cmd_dif_actions)) { cmd->blk_sz = scst_cmd_get_block_size(scst_cmd); @@ -1841,7 +1886,9 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd) struct qla_tgt_cmd *cmd = scst_cmd_get_tgt_priv(scst_cmd); struct scsi_qla_host *vha = cmd->vha; struct qla_qpair *qpair = cmd->qpair; + struct qla_tgt_srr *srr; unsigned long flags; + bool advance_cmd = false; TRACE_ENTRY(); @@ -1872,13 +1919,46 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd) break; } - /* Handle race with normal CTIO completion. */ - if (!cmd->cmd_sent_to_fw) { + srr = cmd->srr; + if (srr) { + /* Handle race with SRR processing. */ + if (srr->imm_ntfy_recvd && srr->ctio_recvd) { + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: cmd should be scheduled for SRR processing", + vha->host_no, vha->vp_idx, + scst_cmd_get_tag(scst_cmd)); + goto out_unlock; + } + TRACE_MGMT_DBG( - "sqatgt(%ld/%d): tag %lld: cmd not sent to fw; assuming just completed", + "sqatgt(%ld/%d): tag %lld: timeout waiting for %s SRR", vha->host_no, vha->vp_idx, - scst_cmd_get_tag(scst_cmd)); - goto out_unlock; + scst_cmd_get_tag(scst_cmd), + (!srr->imm_ntfy_recvd) ? "IMM" : "CTIO"); + + if (srr->ctio_recvd) { + /* + * When the SRR CTIO was received, cmd processing was + * delayed to wait for the SRR immediate notify, which + * never arrived. Process the cmd now. + * + * Note that in this case cmd->cmd_sent_to_fw == 0 + * so we avoid checking that. + */ + advance_cmd = true; + } + + qlt_srr_abort(cmd, false); + srr = NULL; /* srr may have been freed */ + } else { + /* Handle race with normal CTIO completion. */ + if (!cmd->cmd_sent_to_fw) { + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: cmd not sent to fw; assuming just completed", + vha->host_no, vha->vp_idx, + scst_cmd_get_tag(scst_cmd)); + goto out_unlock; + } } /* The command should be aborted elsewhere if the ISP was reset. */ @@ -1886,7 +1966,8 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd) goto out_unlock; /* Reset the ISP if there was a timeout after sending a term exchange. */ - if (cmd->sent_term_exchg && + if (!advance_cmd && + cmd->sent_term_exchg && time_is_before_jiffies(cmd->jiffies_at_term_exchg + SQA_MAX_HW_PENDING_TIME * HZ / 2)) { if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) { @@ -1906,18 +1987,46 @@ static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd) if (!cmd->sent_term_exchg) qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1); - /* - * Restart the timer so that this function is called again - * after another timeout. This is similar to - * scst_update_hw_pending_start() except that we also set - * cmd_hw_pending to 1. - * - * IRQs are already OFF. - */ - spin_lock(&scst_cmd->sess->sess_list_lock); - scst_cmd->cmd_hw_pending = 1; - scst_cmd->hw_pending_start = jiffies; - spin_unlock(&scst_cmd->sess->sess_list_lock); + if (advance_cmd) { + switch (cmd->state) { + case QLA_TGT_STATE_NEED_DATA: + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: force rx_data", + vha->host_no, vha->vp_idx, + scst_cmd_get_tag(scst_cmd)); + cmd->state = QLA_TGT_STATE_DATA_IN; + scst_set_cmd_error(scst_cmd, + SCST_LOAD_SENSE(scst_sense_internal_failure)); + scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET, + SCST_CONTEXT_THREAD); + break; + + case QLA_TGT_STATE_PROCESSED: + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: force finishing cmd", + vha->host_no, vha->vp_idx, + scst_cmd_get_tag(scst_cmd)); + scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED); + scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD); + break; + + default: + break; + } + } else { + /* + * Restart the timer so that this function is called again + * after another timeout. This is similar to + * scst_update_hw_pending_start() except that we also set + * cmd_hw_pending to 1. + * + * IRQs are already OFF. + */ + spin_lock(&scst_cmd->sess->sess_list_lock); + scst_cmd->cmd_hw_pending = 1; + scst_cmd->hw_pending_start = jiffies; + spin_unlock(&scst_cmd->sess->sess_list_lock); + } out_unlock: spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); @@ -1978,6 +2087,8 @@ static struct qla_tgt_func_tmpl sqa_qla2xxx_template = { .handle_tmr = sqa_qla2xxx_handle_tmr, .find_cmd_by_tag = sqa_qla2xxx_find_cmd_by_tag, .get_cmd = sqa_qla2xxx_get_cmd, + .get_cmd_ref = sqa_qla2xxx_get_cmd_ref, + .put_cmd_ref = sqa_qla2xxx_put_cmd_ref, .rel_cmd = sqa_qla2xxx_rel_cmd, .free_cmd = sqa_qla2xxx_free_cmd, .free_mcmd = sqa_qla2xxx_free_mcmd, -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:51:02
|
(target mode) Background: loading qla2xxx with "ql2xtgt_tape_enable=1" enables Sequence Level Error Recovery (SLER), which is most commonly used for tape drives. With SLER enabled, if there is a recoverable I/O error during a SCSI command, a Sequence Retransmission Request (SRR) will be used to retry the I/O at a low-level completely within the driver without propagating the error to the upper levels of the SCSI stack. SRR support was removed in 2017 by commit 2c39b5ca2a8c ("qla2xxx: Remove SRR code"). But I need it, so add it back, new and improved. The old removed SRR code used sequence numbers to correlate the SRR CTIOs with SRR immediate notify messages. I don't see how that would work reliably with MSI-X interrupts and multiple queues. So instead use the exchange address to find the command associated with the immediate notify (qlt_srr_to_cmd). The old removed SRR code had a function qlt_check_srr_debug() to simulate a SRR, but it didn't work for me. Instead I just used fiber optic attenuators attached to the FC cable to reduce the strength of the signal and induce errors. Unfortunately this only worked for inducing SRRs on Data-Out (write) commands, so that is all I was able to test. The code to build a new scatterlist for a SRR with nonzero offset has been improved to reduce memory requirements and has been well-tested. However it does not support protection information. When a single cmd gets multiple SRRs, the old removed SRR code would restore the data buffer from the values in cmd->se_cmd before processing the new SRR. That might be needed if the offset for the new SRR was lower than the offset for the previous SRR, but I am not sure if that can happen. In my testing, when a single cmd gets multiple SRRs, the SRR offset always increases or stays the same. But in case it can decrease, I added the function qlt_restore_orig_sg(). If this is not supposed to happen then qlt_restore_orig_sg() can be removed to simplify the code. I ran into some HBA firmware bugs with QLE269x, QLE27xx, and QLE28xx firmware 9.05.xx - 9.08.xx where a SRR would cause the HBA to misbehave badly. Since SRRs are rare and therefore difficult to test, I figured it would be worth checking for the buggy firmware and disabling SLER with a warning instead of letting others run into the same problem on the rare occasion that they get a SRR. This turned out to be difficult because the firmware version isn't known in the normal NVRAM config routine, so I added a second NVRAM config routine that is called after the firmware version is known. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: - Updated qlt_has_sler_fw_bug() to include more HBAs and fw versions based on fw release notes and updated the corresponding note in the patch description. - Rebased against changes to prior patches (removal of TRC_CTIO_IGNORED). drivers/scsi/qla2xxx/qla_dbg.c | 1 + drivers/scsi/qla2xxx/qla_init.c | 1 + drivers/scsi/qla2xxx/qla_target.c | 1044 +++++++++++++++++++++++++++- drivers/scsi/qla2xxx/qla_target.h | 81 +++ drivers/scsi/qla2xxx/tcm_qla2xxx.c | 15 + 5 files changed, 1141 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index a19471d51ea5..9f56bec26231 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -58,6 +58,7 @@ * | Target Mode Management | 0xf09b | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000d | | + * | Target Mode SRR | 0x11038 | | * ---------------------------------------------------------------------- */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index be211ff22acb..25c1760eac45 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4369,6 +4369,7 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) ha->max_npiv_vports = MIN_MULTI_ID_FABRIC - 1; } + qlt_config_nvram_with_fw_version(vha); qla2x00_get_resource_cnts(vha); qla_init_iocb_limit(vha); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 488c003f22c4..a59742ca51ec 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3224,6 +3224,10 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, * first. */ + /* Begin timer on the first call, not on SRR retry. */ + if (likely(cmd->jiffies_at_hw_st_entry == 0)) + cmd->jiffies_at_hw_st_entry = get_jiffies_64(); + spin_lock_irqsave(qpair->qp_lock_ptr, flags); if (unlikely(cmd->sent_term_exchg || @@ -3367,6 +3371,10 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) int pci_map_res; struct qla_qpair *qpair = cmd->qpair; + /* Begin timer on the first call, not on SRR retry. */ + if (likely(cmd->jiffies_at_hw_st_entry == 0)) + cmd->jiffies_at_hw_st_entry = get_jiffies_64(); + memset(&prm, 0, sizeof(prm)); prm.cmd = cmd; prm.tgt = tgt; @@ -3392,6 +3400,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) cmd->aborted = 1; cmd->write_data_transferred = 0; cmd->state = QLA_TGT_STATE_DATA_IN; + cmd->jiffies_at_hw_st_entry = 0; vha->hw->tgt.tgt_ops->handle_data(cmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); return 0; @@ -3439,6 +3448,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) return res; out_unlock_free_unmap: + cmd->jiffies_at_hw_st_entry = 0; qlt_unmap_sg(vha, cmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); @@ -3528,6 +3538,7 @@ qlt_handle_dif_error(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, case QLA_TGT_STATE_NEED_DATA: /* handle_data will load DIF error code */ cmd->state = QLA_TGT_STATE_DATA_IN; + cmd->jiffies_at_hw_st_entry = 0; vha->hw->tgt.tgt_ops->handle_data(cmd); break; default: @@ -3607,6 +3618,62 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha, pr_debug("rc = %d\n", rc); } +/* + * Handle a SRR that had been previously associated with a command when the + * command has been aborted or otherwise cannot process the SRR. + * + * If reject is true, then attempt to reject the SRR. Otherwise abort the + * immediate notify exchange. + */ +void qlt_srr_abort(struct qla_tgt_cmd *cmd, bool reject) +{ + struct scsi_qla_host *vha = cmd->vha; + struct qla_tgt_srr *srr = cmd->srr; + + if (srr->imm_ntfy_recvd) { + if (reject) + srr->reject = true; + else + srr->aborted = true; + + if (srr->ctio_recvd) { + /* + * The SRR should already be scheduled for processing, + * and the SRR processing code should see that the cmd + * has been aborted and take appropriate action. In + * addition, the cmd refcount should have been + * incremented, preventing the cmd from being freed + * until SRR processing is done. + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1102e, + "qla_target(%d): tag %lld: %s: SRR already scheduled\n", + vha->vp_idx, cmd->se_cmd.tag, __func__); + } else { + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + unsigned long flags; + + /* Shedule processing for the SRR immediate notify. */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1102f, + "qla_target(%d): tag %lld: %s: schedule SRR %s\n", + vha->vp_idx, cmd->se_cmd.tag, __func__, + reject ? "reject" : "abort"); + cmd->srr = NULL; + srr->cmd = NULL; + spin_lock_irqsave(&tgt->srr_lock, flags); + list_add_tail(&srr->srr_list_entry, &tgt->srr_list); + queue_work(qla_tgt_wq, &tgt->srr_work); + spin_unlock_irqrestore(&tgt->srr_lock, flags); + } + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11030, + "qla_target(%d): tag %lld: %s: no IMM SRR; free SRR\n", + vha->vp_idx, cmd->se_cmd.tag, __func__); + cmd->srr = NULL; + kfree(srr); + } +} +EXPORT_SYMBOL(qlt_srr_abort); + /* * If hardware_lock held on entry, might drop it, then reaquire * This function sends the appropriate CTIO to ISP 2xxx or 24xx @@ -3842,9 +3909,16 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) qlt_decr_num_pend_cmds(cmd->vha); BUG_ON(cmd->sg_mapped); + if (unlikely(cmd->free_sg)) { + cmd->free_sg = 0; + qlt_free_sg(cmd); + } + if (unlikely(cmd->srr)) + qlt_srr_abort(cmd, false); if (unlikely(cmd->aborted || - (cmd->trc_flags & (TRC_CTIO_STRANGE | TRC_CTIO_ERR)))) { + (cmd->trc_flags & (TRC_CTIO_STRANGE | TRC_CTIO_ERR | + TRC_SRR_CTIO | TRC_SRR_IMM)))) { ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xe086, "qla_target(%d): tag %lld: free cmd (trc_flags %x, aborted %u, sent_term_exchg %u, rsp_sent %u)\n", cmd->vha->vp_idx, cmd->se_cmd.tag, @@ -3868,6 +3942,62 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) } EXPORT_SYMBOL(qlt_free_cmd); +/* + * Process a CTIO response for a SCSI command that failed due to SRR. + * + * qpair->qp_lock_ptr supposed to be held on entry + */ +static int qlt_prepare_srr_ctio(struct qla_qpair *qpair, + struct qla_tgt_cmd *cmd) +{ + struct scsi_qla_host *vha = cmd->vha; + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_tgt_srr *srr; + + cmd->trc_flags |= TRC_SRR_CTIO; + + srr = cmd->srr; + if (srr != NULL) { + /* qlt_prepare_srr_imm() was called first. */ + + WARN_ON(srr->ctio_recvd); + WARN_ON(!srr->imm_ntfy_recvd); + + if (vha->hw->tgt.tgt_ops->get_cmd_ref(cmd)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11037, + "qla_target(%d): tag %lld: unable to get cmd ref for SRR processing\n", + vha->vp_idx, cmd->se_cmd.tag); + qlt_srr_abort(cmd, true); + return -ESHUTDOWN; + } + + srr->ctio_recvd = true; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1100f, + "qla_target(%d): tag %lld: Scheduling SRR work\n", + vha->vp_idx, cmd->se_cmd.tag); + + /* Schedule the srr for processing in qlt_handle_srr(). */ + /* IRQ is already OFF */ + spin_lock(&tgt->srr_lock); + list_add_tail(&srr->srr_list_entry, &tgt->srr_list); + queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq, &tgt->srr_work); + spin_unlock(&tgt->srr_lock); + return 0; + } + + srr = kzalloc(sizeof(*srr), GFP_ATOMIC); + if (!srr) + return -ENOMEM; + + /* Expect qlt_prepare_srr_imm() to be called. */ + srr->ctio_recvd = true; + srr->cmd = cmd; + srr->reset_count = cmd->reset_count; + cmd->srr = srr; + return 0; +} + /* ha->hardware_lock supposed to be held on entry */ static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha, struct rsp_que *rsp, uint32_t handle, void *ctio) @@ -4058,6 +4188,17 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, break; } + case CTIO_SRR_RECEIVED: + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1100e, + "qla_target(%d): tag %lld, op %x: CTIO with SRR status 0x%x received (state %d, port %8phC, LUN %lld, bufflen %d)\n", + vha->vp_idx, cmd->se_cmd.tag, op, status, + cmd->state, cmd->sess->port_name, + cmd->unpacked_lun, cmd->bufflen); + + if (qlt_prepare_srr_ctio(qpair, cmd) == 0) + return; + break; + case CTIO_DIF_ERROR: { struct ctio_crc_from_fw *crc = (struct ctio_crc_from_fw *)ctio; @@ -4110,6 +4251,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1); } + if (unlikely(cmd->srr != NULL)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11031, + "qla_target(%d): tag %lld, op %x: expected CTIO with SRR status; got status 0x%x: state %d, bufflen %d\n", + vha->vp_idx, cmd->se_cmd.tag, + cmd->cdb ? cmd->cdb[0] : 0, status, cmd->state, + cmd->bufflen); + qlt_srr_abort(cmd, true); + } + if (cmd->state == QLA_TGT_STATE_PROCESSED) { cmd->trc_flags |= TRC_CTIO_DONE; @@ -4122,6 +4272,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, if (status == CTIO_SUCCESS) cmd->write_data_transferred = 1; + cmd->jiffies_at_hw_st_entry = 0; ha->tgt.tgt_ops->handle_data(cmd); return; } else if (cmd->aborted) { @@ -5022,6 +5173,863 @@ static int qlt_handle_login(struct scsi_qla_host *vha, return res; } +/* + * Return true if the HBA firmware version is known to have bugs that + * prevent Sequence Level Error Recovery (SLER) / Sequence Retransmission + * Request (SRR) from working. + * + * Some bad versions are based on testing and some are based on "Marvell Fibre + * Channel Firmware Release Notes". + */ +static bool qlt_has_sler_fw_bug(struct qla_hw_data *ha) +{ + bool has_sler_fw_bug = false; + + if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) { + /* + * In the fw release notes: + * ER147301 was added to v9.05.00 causing SLER regressions + * FCD-259 was fixed in v9.08.00 + * FCD-371 was fixed in v9.08.00 + * FCD-1183 was fixed in v9.09.00 + * + * QLE2694L (ISP2071) known bad firmware (tested): + * 9.06.02 + * 9.07.00 + * 9.08.02 + * SRRs trigger hundreds of bogus entries in the response + * queue and various other problems. + * + * QLE2694L known good firmware (tested): + * 8.08.05 + * 9.09.00 + * + * Suspected bad firmware (not confirmed by testing): + * v9.05.xx + * + * unknown firmware: + * 9.00.00 - 9.04.xx + */ + if (ha->fw_major_version == 9 && + ha->fw_minor_version >= 5 && + ha->fw_minor_version <= 8) + has_sler_fw_bug = true; + } + + return has_sler_fw_bug; +} + +/* + * Return true and print a message if the HA has been reset since the SRR + * immediate notify was received; else return false. + */ +static bool qlt_srr_is_chip_reset(struct scsi_qla_host *vha, + struct qla_qpair *qpair, struct qla_tgt_srr *srr) +{ + if (!vha->flags.online || + !qpair->fw_started || + srr->reset_count != qpair->chip_reset) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1100d, + "qla_target(%d): chip reset; discarding IMM SRR\n", + vha->vp_idx); + return true; + } + return false; +} + +/* Find and return the command associated with a SRR immediate notify. */ +static struct qla_tgt_cmd *qlt_srr_to_cmd(struct scsi_qla_host *vha, + const struct imm_ntfy_from_isp *iocb) +{ + struct qla_hw_data *ha = vha->hw; + struct fc_port *sess; + struct qla_tgt_cmd *cmd; + uint32_t tag = le32_to_cpu(iocb->u.isp24.exchange_address); + uint16_t loop_id; + be_id_t s_id; + unsigned long flags; + + if (tag == ATIO_EXCHANGE_ADDRESS_UNKNOWN) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11009, + "qla_target(%d): IMM SRR with unknown exchange address; reject SRR\n", + vha->vp_idx); + return NULL; + } + + loop_id = le16_to_cpu(iocb->u.isp24.nport_handle); + + s_id.domain = iocb->u.isp24.port_id[2]; + s_id.area = iocb->u.isp24.port_id[1]; + s_id.al_pa = iocb->u.isp24.port_id[0]; + + spin_lock_irqsave(&ha->tgt.sess_lock, flags); + sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id); + if (!sess) + sess = ha->tgt.tgt_ops->find_sess_by_loop_id(vha, loop_id); + if (!sess || sess->deleted) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1100a, + "qla_target(%d): could not find session for IMM SRR; reject SRR\n", + vha->vp_idx); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + return NULL; + } + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + + cmd = ha->tgt.tgt_ops->find_cmd_by_tag(sess, tag); + if (!cmd) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1100b, + "qla_target(%d): could not find cmd for IMM SRR; reject SRR\n", + vha->vp_idx); + } else { + u16 srr_ox_id = le16_to_cpu(iocb->u.isp24.srr_ox_id); + u16 cmd_ox_id = be16_to_cpu(cmd->atio.u.isp24.fcp_hdr.ox_id); + + if (srr_ox_id != cmd_ox_id) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1100c, + "qla_target(%d): tag %lld: IMM SRR: srr_ox_id[%04x] != cmd_ox_id[%04x]; reject SRR\n", + vha->vp_idx, cmd->se_cmd.tag, + srr_ox_id, cmd_ox_id); + cmd = NULL; + } + } + + return cmd; +} + +/* + * Handle an immediate notify SRR (Sequence Retransmission Request) message from + * the hardware. The hardware will also send a CTIO with CTIO_SRR_RECEIVED status + * for the affected command. + * + * This may be called a second time for the same immediate notify SRR if + * CTIO_SRR_RECEIVED is never received and qlt_srr_abort() is called. + * + * Process context, no locks + */ +static void qlt_handle_srr_imm(struct scsi_qla_host *vha, + struct qla_tgt_srr *srr) +{ + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_hw_data *ha = vha->hw; + struct qla_qpair *qpair; + struct qla_tgt_cmd *cmd; + uint8_t srr_explain = NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL; + + /* handle qlt_srr_abort() */ + if (srr->aborted) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11004, + "qla_target(%d): IMM SRR: terminating SRR for aborted cmd\n", + vha->vp_idx); + spin_lock_irq(&ha->hardware_lock); + if (!qlt_srr_is_chip_reset(vha, ha->base_qpair, srr)) + qlt_send_term_imm_notif(vha, &srr->imm_ntfy, 1); + spin_unlock_irq(&ha->hardware_lock); + kfree(srr); + return; + } + if (srr->reject) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11005, + "qla_target(%d): IMM SRR: rejecting SRR for unknown cmd\n", + vha->vp_idx); + goto out_reject; + } + + /* Find the command associated with the SRR. */ + cmd = qlt_srr_to_cmd(vha, &srr->imm_ntfy); + if (cmd == NULL) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11005, + "qla_target(%d): IMM SRR: rejecting SRR for unknown cmd\n", + vha->vp_idx); + srr_explain = NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_INVALID_OX_ID_RX_ID; + goto out_reject; + } + + if (ha->tgt.tgt_ops->get_cmd_ref(cmd)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11038, + "qla_target(%d): IMM SRR: unable to get cmd ref; rejecting SRR\n", + vha->vp_idx); + cmd = NULL; + goto out_reject; + } + + qpair = cmd->qpair; + + spin_lock_irq(qpair->qp_lock_ptr); + + if (cmd->reset_count != srr->reset_count) { + /* force a miscompare */ + srr->reset_count = qpair->chip_reset ^ 1; + } + if (qlt_srr_is_chip_reset(vha, qpair, srr)) { + spin_unlock_irq(qpair->qp_lock_ptr); + ha->tgt.tgt_ops->put_cmd_ref(cmd); + kfree(srr); + return; + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11001, + "qla_target(%d): tag %lld, op %x: received IMM SRR\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->cdb ? cmd->cdb[0] : 0); + + cmd->trc_flags |= TRC_SRR_IMM; + + if (cmd->srr != NULL) { + if (cmd->srr->imm_ntfy_recvd) { + /* + * Received another immediate notify SRR message for + * this command before the previous one could be processed + * (not expected to happen). + */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11006, + "qla_target(%d): tag %lld: received multiple IMM SRR; reject SRR\n", + vha->vp_idx, cmd->se_cmd.tag); + spin_unlock_irq(qpair->qp_lock_ptr); + ha->tgt.tgt_ops->put_cmd_ref(cmd); + goto out_reject; + } + + /* qlt_prepare_srr_ctio() was called first. */ + WARN_ON(!cmd->srr->ctio_recvd); + + /* + * The immediate notify and CTIO handlers both allocated + * separate srr structs; combine them. + */ + memcpy(&cmd->srr->imm_ntfy, &srr->imm_ntfy, + sizeof(srr->imm_ntfy)); + kfree(srr); + srr = cmd->srr; + srr->imm_ntfy_recvd = true; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11002, + "qla_target(%d): tag %lld: schedule SRR work\n", + vha->vp_idx, cmd->se_cmd.tag); + + /* Schedule the srr for processing in qlt_handle_srr(). */ + spin_lock(&tgt->srr_lock); + list_add_tail(&srr->srr_list_entry, &tgt->srr_list); + /* + * Already running the work function; no need to schedule + * tgt->srr_work. + */ + spin_unlock(&tgt->srr_lock); + spin_unlock_irq(qpair->qp_lock_ptr); + /* return with cmd refcount incremented */ + return; + } + + /* The CTIO SRR for this command has not yet been received. */ + + if (cmd->sent_term_exchg) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11007, + "qla_target(%d): tag %lld: IMM SRR: cmd already aborted\n", + vha->vp_idx, cmd->se_cmd.tag); + spin_unlock_irq(qpair->qp_lock_ptr); + spin_lock_irq(&ha->hardware_lock); + if (!qlt_srr_is_chip_reset(vha, ha->base_qpair, srr)) + qlt_send_term_imm_notif(vha, &srr->imm_ntfy, 1); + spin_unlock_irq(&ha->hardware_lock); + kfree(srr); + ha->tgt.tgt_ops->put_cmd_ref(cmd); + return; + } + + /* If not expecting a CTIO, then reject IMM SRR. */ + if (!cmd->cmd_sent_to_fw) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11008, + "qla_target(%d): tag %lld: IMM SRR but !cmd_sent_to_fw (state %d); reject SRR\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->state); + spin_unlock_irq(qpair->qp_lock_ptr); + ha->tgt.tgt_ops->put_cmd_ref(cmd); + goto out_reject; + } + + /* Expect qlt_prepare_srr_ctio() to be called. */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11003, + "qla_target(%d): tag %lld: wait for CTIO SRR (state %d)\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->state); + srr->cmd = cmd; + cmd->srr = srr; + + spin_unlock_irq(qpair->qp_lock_ptr); + + ha->tgt.tgt_ops->put_cmd_ref(cmd); + return; + +out_reject: + qpair = vha->hw->base_qpair; + spin_lock_irq(qpair->qp_lock_ptr); + if (!qlt_srr_is_chip_reset(vha, qpair, srr)) + qlt_send_notify_ack(qpair, &srr->imm_ntfy, 0, 0, 0, + NOTIFY_ACK_SRR_FLAGS_REJECT, + NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM, + srr_explain); + spin_unlock_irq(qpair->qp_lock_ptr); + kfree(srr); +} + +/* + * Handle an immediate notify SRR (Sequence Retransmission Request) message from + * the hardware. The hardware will also send a CTIO with CTIO_SRR_RECEIVED status + * for the affected command. + * + * ha->hardware_lock supposed to be held on entry + */ +static void qlt_prepare_srr_imm(struct scsi_qla_host *vha, + struct imm_ntfy_from_isp *iocb) +{ + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + struct qla_tgt_srr *srr; + + ql_log(ql_log_warn, vha, 0x11000, "qla_target(%d): received IMM SRR\n", + vha->vp_idx); + + /* + * Need cmd->qpair->qp_lock_ptr, but have ha->hardware_lock. Defer + * processing to a workqueue so that the right lock can be acquired + * safely. + */ + + srr = kzalloc(sizeof(*srr), GFP_ATOMIC); + if (!srr) + goto out_reject; + + memcpy(&srr->imm_ntfy, iocb, sizeof(srr->imm_ntfy)); + srr->imm_ntfy_recvd = true; + srr->reset_count = vha->hw->base_qpair->chip_reset; + spin_lock(&tgt->srr_lock); + list_add_tail(&srr->srr_list_entry, &tgt->srr_list); + queue_work(qla_tgt_wq, &tgt->srr_work); + spin_unlock(&tgt->srr_lock); + /* resume processing in qlt_handle_srr_imm() */ + return; + +out_reject: + qlt_send_notify_ack(vha->hw->base_qpair, iocb, 0, 0, 0, + NOTIFY_ACK_SRR_FLAGS_REJECT, + NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM, + NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL); +} + +/* + * If possible, undo the effect of qlt_set_data_offset() and restore the cmd + * data buffer back to its full size. + */ +static int qlt_restore_orig_sg(struct qla_tgt_cmd *cmd) +{ + struct scsi_qla_host *vha = cmd->vha; + struct se_cmd *se_cmd = &cmd->se_cmd; + + WARN_ON(cmd->sg_mapped); + + if (cmd->offset == 0) { + /* qlt_set_data_offset() has not been called. */ + return 0; + } + + if (se_cmd->t_data_sg == NULL || + se_cmd->t_data_nents == 0 || + se_cmd->data_length == 0) { + /* The original scatterlist is not available. */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1102c, + "qla_target(%d): tag %lld: cannot restore original cmd buffer; keep modified buffer at offset %d\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->offset); + return -ENOENT; + } + + /* Restore the original scatterlist. */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1102d, + "qla_target(%d): tag %lld: restore original cmd buffer: offset %d -> 0\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->offset); + if (cmd->free_sg) { + cmd->free_sg = 0; + qlt_free_sg(cmd); + } + cmd->offset = 0; + cmd->sg = se_cmd->t_data_sg; + cmd->sg_cnt = se_cmd->t_data_nents; + cmd->bufflen = se_cmd->data_length; + return 0; +} + +/* + * Adjust the data buffer of the given command to skip over offset bytes from + * the beginning while also reducing the length by offset bytes. + * + * This may be called multiple times for a single command if there are multiple + * SRRs, which each call reducing the buffer size further relative to the + * previous call. Note that the buffer may be reset back to its original size + * by calling qlt_restore_orig_sg(). + */ +static int qlt_set_data_offset(struct qla_tgt_cmd *cmd, uint32_t offset) +{ + struct scsi_qla_host *vha = cmd->vha; + struct scatterlist *sg_srr_start = NULL, *sg; + uint32_t first_offset = offset; + int sg_srr_cnt, i; + int bufflen = 0; + + WARN_ON(cmd->sg_mapped); + + ql_dbg(ql_dbg_tgt, vha, 0x11020, + "qla_target(%d): tag %lld: %s: sg %p sg_cnt %d dir %d cmd->offset %d cmd->bufflen %d add offset %u\n", + vha->vp_idx, cmd->se_cmd.tag, __func__, cmd->sg, + cmd->sg_cnt, cmd->dma_data_direction, cmd->offset, cmd->bufflen, + offset); + + if (cmd->se_cmd.prot_op != TARGET_PROT_NORMAL) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11021, + "qla_target(%d): tag %lld: %s: SRR with protection information at nonzero offset not implemented\n", + vha->vp_idx, cmd->se_cmd.tag, __func__); + return -EINVAL; + } + + if (!cmd->sg || !cmd->sg_cnt) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11022, + "qla_target(%d): tag %lld: %s: Missing cmd->sg or zero cmd->sg_cnt\n", + vha->vp_idx, cmd->se_cmd.tag, __func__); + return -EINVAL; + } + + /* + * Walk the current cmd->sg list until we locate the new sg_srr_start + */ + for_each_sg(cmd->sg, sg, cmd->sg_cnt, i) { + ql_dbg(ql_dbg_tgt, vha, 0x11023, + "sg[%d]: %p page: %p, length: %d, offset: %d\n", + i, sg, sg_page(sg), sg->length, sg->offset); + + if (first_offset < sg->length) { + sg_srr_start = sg; + break; + } + first_offset -= sg->length; + } + + if (!sg_srr_start) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11024, + "qla_target(%d): tag %lld: Unable to locate sg_srr_start for offset: %u\n", + vha->vp_idx, cmd->se_cmd.tag, offset); + return -EINVAL; + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11025, + "qla_target(%d): tag %lld: prepare SRR sgl at sg index %d of %d byte offset %u of %u\n", + vha->vp_idx, cmd->se_cmd.tag, i, cmd->sg_cnt, + first_offset, sg_srr_start->length); + + sg_srr_cnt = cmd->sg_cnt - i; + + if (first_offset == 0 && !cmd->free_sg) { + /* + * The offset points to the beginning of a scatterlist element. + * In this case there is no need to modify the first scatterlist + * element, so we can just point directly inside the original + * unmodified scatterlist. + */ + ql_dbg(ql_dbg_tgt, vha, 0x11026, "point directly to old sgl\n"); + cmd->sg = sg_srr_start; + } else { + /* + * Allocate at most 2 new scatterlist elements to reduce memory + * requirements. + */ + int n_alloc_sg = min(sg_srr_cnt, 2); + struct scatterlist *sg_srr = + kmalloc_array(n_alloc_sg, sizeof(*sg_srr), GFP_ATOMIC); + if (!sg_srr) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11027, + "qla_target(%d): tag %lld: Unable to allocate SRR scatterlist\n", + vha->vp_idx, cmd->se_cmd.tag); + return -ENOMEM; + } + sg_init_table(sg_srr, n_alloc_sg); + + /* Init the first sg element to skip over the unneeded data. */ + sg_set_page(&sg_srr[0], sg_page(sg_srr_start), + sg_srr_start->length - first_offset, + sg_srr_start->offset + first_offset); + if (sg_srr_cnt == 1) { + ql_dbg(ql_dbg_tgt, vha, 0x11028, + "single-element array\n"); + } else if (sg_srr_cnt == 2) { + /* Only two elements; copy the last element. */ + ql_dbg(ql_dbg_tgt, vha, 0x11029, + "complete two-element array\n"); + sg = sg_next(sg_srr_start); + sg_set_page(&sg_srr[1], sg_page(sg), sg->length, + sg->offset); + } else { + /* + * Three or more elements; chain our newly-allocated + * 2-entry array to the rest of the original + * scatterlist at the splice point. + */ + ql_dbg(ql_dbg_tgt, vha, 0x1102a, + "chain to original scatterlist\n"); + sg = sg_next(sg_srr_start); + sg_chain(sg_srr, 2, sg); + } + + /* + * If the previous scatterlist was allocated here on a previous + * call, then it should be safe to free now. + */ + if (cmd->free_sg) + qlt_free_sg(cmd); + cmd->sg = sg_srr; + cmd->free_sg = 1; + } + + /* Note that sg_cnt doesn't include any extra chain elements. */ + cmd->sg_cnt = sg_srr_cnt; + cmd->offset += offset; + cmd->bufflen -= offset; + + /* Check the scatterlist length for consistency. */ + for_each_sg(cmd->sg, sg, cmd->sg_cnt, i) { + bufflen += sg->length; + } + if (bufflen != cmd->bufflen) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1102b, + "qla_target(%d): tag %lld: %s: bad sgl length: expected %d got %d\n", + vha->vp_idx, cmd->se_cmd.tag, __func__, cmd->bufflen, bufflen); + return -EINVAL; + } + + return 0; +} + +/* + * Given the "SRR relative offset" (offset of data to retry), determine what + * needs to be retransmitted (data and/or status) and return the mask in + * xmit_type. If retrying data, adjust the command buffer to point to only the + * data that need to be retried, skipping over the data that don't need to be + * retried. + * + * Returns 0 for success or a negative error number. + */ +static inline int qlt_srr_adjust_data(struct qla_tgt_cmd *cmd, + uint32_t srr_rel_offs, int *xmit_type) +{ + struct scsi_qla_host *vha = cmd->vha; + int res = 0, rel_offs; + + if (srr_rel_offs < cmd->offset || + srr_rel_offs > cmd->offset + cmd->bufflen) { + *xmit_type = 0; + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1101e, + "qla_target(%d): tag %lld: srr_rel_offs %u outside accepted range %u - %u\n", + vha->vp_idx, cmd->se_cmd.tag, srr_rel_offs, + cmd->offset, cmd->offset + cmd->bufflen); + return -EINVAL; + } + + /* + * srr_rel_offs is the offset of the data we need from the beginning of + * the *original* buffer. + * + * cmd->offset is the offset of the current cmd scatterlist from the + * beginning of the *original* buffer, which might be nonzero if there + * was a previous SRR and the buffer could not be reset back to its + * original size. + * + * rel_offs is the offset of the data we need from the beginning of the + * current cmd scatterlist. + */ + rel_offs = srr_rel_offs - cmd->offset; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1101f, + "qla_target(%d): tag %lld: current buffer [%u - %u); srr_rel_offs=%d, rel_offs=%d\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->offset, + cmd->offset + cmd->bufflen, srr_rel_offs, rel_offs); + + *xmit_type = QLA_TGT_XMIT_ALL; + + if (rel_offs == cmd->bufflen) + *xmit_type = QLA_TGT_XMIT_STATUS; + else if (rel_offs > 0) + res = qlt_set_data_offset(cmd, rel_offs); + + return res; +} + +/* + * Process a SRR (Sequence Retransmission Request) for a SCSI command once both + * the immediate notify SRR and CTIO SRR have been received from the hw. + * + * Process context, no locks + */ +static void qlt_handle_srr(struct scsi_qla_host *vha, struct qla_tgt_srr *srr) +{ + struct qla_tgt_cmd *cmd = srr->cmd; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct qla_qpair *qpair = cmd->qpair; + struct qla_hw_data *ha = vha->hw; + uint8_t op = cmd->cdb ? cmd->cdb[0] : 0; + uint32_t srr_rel_offs = le32_to_cpu(srr->imm_ntfy.u.isp24.srr_rel_offs); + uint16_t srr_ui = le16_to_cpu(srr->imm_ntfy.u.isp24.srr_ui); + int xmit_type = 0; + bool xmit_response = false; + bool rdy_to_xfer = false; + bool did_timeout; + bool send_term_exch = false; + + spin_lock_irq(qpair->qp_lock_ptr); + + WARN_ON(cmd->cmd_sent_to_fw); + + cmd->srr = NULL; + + if (qlt_srr_is_chip_reset(vha, qpair, srr)) + goto out_advance_cmd; + + if (cmd->sent_term_exchg || cmd->sess->deleted || srr->aborted) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11010, + "qla_target(%d): tag %lld: IMM SRR: cmd already aborted\n", + vha->vp_idx, cmd->se_cmd.tag); + + spin_unlock_irq(qpair->qp_lock_ptr); + + spin_lock_irq(&ha->hardware_lock); + if (!qlt_srr_is_chip_reset(vha, ha->base_qpair, srr)) + qlt_send_term_imm_notif(vha, &srr->imm_ntfy, 1); + spin_unlock_irq(&ha->hardware_lock); + + send_term_exch = true; + + spin_lock_irq(qpair->qp_lock_ptr); + goto out_advance_cmd; + } + + if (srr->reject) + goto out_reject; + + /* + * If we receive multiple SRRs for the same command, place a time limit + * on how long we are willing to retry. This timeout should be less + * than SQA_MAX_HW_PENDING_TIME in scst_qla2xxx.c. + */ + did_timeout = time_is_before_jiffies64((cmd->jiffies_at_hw_st_entry ? : + cmd->jiffies_at_alloc) + 30 * HZ); + + qlt_restore_orig_sg(cmd); + + switch (srr_ui) { + case SRR_IU_STATUS: + if (cmd->state != QLA_TGT_STATE_PROCESSED) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11011, + "qla_target(%d): tag %lld, op %x: reject SRR_IU_STATUS due to unexpected state %d\n", + vha->vp_idx, se_cmd->tag, op, + cmd->state); + goto out_reject; + } + + if (did_timeout) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11033, + "qla_target(%d): tag %lld, op %x: reject SRR_IU_STATUS due to timeout\n", + vha->vp_idx, se_cmd->tag, op); + goto out_reject; + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11012, + "qla_target(%d): tag %lld, op %x: accept SRR_IU_STATUS and retransmit scsi_status=%x\n", + vha->vp_idx, se_cmd->tag, op, + se_cmd->scsi_status); + xmit_type = QLA_TGT_XMIT_STATUS; + xmit_response = true; + cmd->trc_flags |= TRC_SRR_RSP; + break; + + case SRR_IU_DATA_IN: + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11013, + "qla_target(%d): tag %lld, op %x: process SRR_IU_DATA_IN: bufflen=%d, sg_cnt=%d, offset=%d, srr_offset=%d, scsi_status=%x\n", + vha->vp_idx, se_cmd->tag, op, cmd->bufflen, + cmd->sg_cnt, cmd->offset, srr_rel_offs, + se_cmd->scsi_status); + + if (cmd->state != QLA_TGT_STATE_PROCESSED) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11014, + "qla_target(%d): tag %lld: reject SRR_IU_DATA_IN due to unexpected state %d\n", + vha->vp_idx, se_cmd->tag, cmd->state); + goto out_reject; + } + + /* + * QLA_TGT_STATE_PROCESSED does not necessarily imply data-in + */ + if (!qlt_has_data(cmd)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11015, + "qla_target(%d): tag %lld: reject SRR_IU_DATA_IN because cmd has no data to send\n", + vha->vp_idx, se_cmd->tag); + goto out_reject; + } + + if (!cmd->sg || !cmd->sg_cnt) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11016, + "qla_target(%d): tag %lld: reject SRR_IU_DATA_IN because buffer is missing\n", + vha->vp_idx, se_cmd->tag); + goto out_reject; + } + + if (did_timeout) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11034, + "qla_target(%d): tag %lld, op %x: reject SRR_IU_DATA_IN due to timeout\n", + vha->vp_idx, se_cmd->tag, op); + goto out_reject; + } + + if (qlt_srr_adjust_data(cmd, srr_rel_offs, &xmit_type) != 0) + goto out_reject; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11017, + "qla_target(%d): tag %lld: accept SRR_IU_DATA_IN and retransmit data: bufflen=%d, offset=%d\n", + vha->vp_idx, se_cmd->tag, cmd->bufflen, + cmd->offset); + xmit_response = true; + cmd->trc_flags |= TRC_SRR_RSP; + break; + + case SRR_IU_DATA_OUT: + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11018, + "qla_target(%d): tag %lld, op %x: process SRR_IU_DATA_OUT: bufflen=%d, sg_cnt=%d, offset=%d, srr_offset=%d\n", + vha->vp_idx, se_cmd->tag, op, cmd->bufflen, + cmd->sg_cnt, cmd->offset, srr_rel_offs); + + if (cmd->state != QLA_TGT_STATE_NEED_DATA) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11019, + "qla_target(%d): tag %lld: reject SRR_IU_DATA_OUT due to unexpected state %d\n", + vha->vp_idx, se_cmd->tag, cmd->state); + goto out_reject; + } + + /* + * QLA_TGT_STATE_NEED_DATA implies there should be data-out + */ + if (!qlt_has_data(cmd) || !cmd->sg || !cmd->sg_cnt) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1101a, + "qla_target(%d): tag %lld: reject SRR_IU_DATA_OUT because buffer is missing\n", + vha->vp_idx, se_cmd->tag); + goto out_reject; + } + + if (did_timeout) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11035, + "qla_target(%d): tag %lld, op %x: reject SRR_IU_DATA_OUT due to timeout\n", + vha->vp_idx, se_cmd->tag, op); + goto out_reject; + } + + if (qlt_srr_adjust_data(cmd, srr_rel_offs, &xmit_type) != 0) + goto out_reject; + + if (!(xmit_type & QLA_TGT_XMIT_DATA)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1101b, + "qla_target(%d): tag %lld: reject SRR_IU_DATA_OUT: bad offset\n", + vha->vp_idx, se_cmd->tag); + goto out_reject; + } + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1101c, + "qla_target(%d): tag %lld: accept SRR_IU_DATA_OUT and receive data again: bufflen=%d, offset=%d\n", + vha->vp_idx, se_cmd->tag, cmd->bufflen, + cmd->offset); + cmd->trc_flags |= TRC_SRR_XRDY; + rdy_to_xfer = true; + break; + + default: + ql_dbg(ql_dbg_tgt_mgt, vha, 0x1101d, + "qla_target(%d): tag %lld, op %x: reject unknown srr_ui value 0x%x: state=%d, bufflen=%d, offset=%d, srr_offset=%d\n", + vha->vp_idx, se_cmd->tag, op, srr_ui, cmd->state, + cmd->bufflen, cmd->offset, srr_rel_offs); + goto out_reject; + } + + qlt_send_notify_ack(qpair, &srr->imm_ntfy, 0, 0, 0, + NOTIFY_ACK_SRR_FLAGS_ACCEPT, 0, 0); + + spin_unlock_irq(qpair->qp_lock_ptr); + + if (xmit_response) { + /* For status and data-in, retransmit the response. */ + if (qlt_xmit_response(cmd, xmit_type, se_cmd->scsi_status)) { + send_term_exch = true; + spin_lock_irq(qpair->qp_lock_ptr); + goto out_advance_cmd; + } + } else if (rdy_to_xfer) { + /* For data-out, receive data again. */ + if (qlt_rdy_to_xfer(cmd)) { + send_term_exch = true; + spin_lock_irq(qpair->qp_lock_ptr); + goto out_advance_cmd; + } + } + + return; + +out_reject: + qlt_send_notify_ack(qpair, &srr->imm_ntfy, 0, 0, 0, + NOTIFY_ACK_SRR_FLAGS_REJECT, + NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM, + NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL); + +out_advance_cmd: + if (!cmd->sent_term_exchg && + (send_term_exch || cmd->state != QLA_TGT_STATE_NEED_DATA) && + !qlt_srr_is_chip_reset(vha, qpair, srr)) { + cmd->trc_flags |= TRC_SRR_TERM; + qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1); + } + if (cmd->state == QLA_TGT_STATE_NEED_DATA) { + /* + * The initiator should abort the command, but if not, try to + * return an error. + */ + cmd->srr_failed = 1; + cmd->write_data_transferred = 0; + cmd->state = QLA_TGT_STATE_DATA_IN; + cmd->jiffies_at_hw_st_entry = 0; + vha->hw->tgt.tgt_ops->handle_data(cmd); + } else { + vha->hw->tgt.tgt_ops->free_cmd(cmd); + } + spin_unlock_irq(qpair->qp_lock_ptr); +} + +/* Workqueue function for processing SRR work in process context. */ +static void qlt_handle_srr_work(struct work_struct *work) +{ + struct qla_tgt *tgt = container_of(work, struct qla_tgt, srr_work); + struct scsi_qla_host *vha = tgt->vha; + + ql_dbg(ql_dbg_tgt_mgt, vha, 0x11032, + "qla_target(%d): Entering SRR work\n", vha->vp_idx); + + for (;;) { + struct qla_tgt_srr *srr; + + spin_lock_irq(&tgt->srr_lock); + srr = list_first_entry_or_null(&tgt->srr_list, typeof(*srr), + srr_list_entry); + if (!srr) { + spin_unlock_irq(&tgt->srr_lock); + break; + } + list_del(&srr->srr_list_entry); + spin_unlock_irq(&tgt->srr_lock); + + if (!srr->cmd) { + qlt_handle_srr_imm(vha, srr); + } else { + qlt_handle_srr(vha, srr); + vha->hw->tgt.tgt_ops->put_cmd_ref(srr->cmd); + kfree(srr); + } + } +} + /* * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire */ @@ -5449,6 +6457,11 @@ static void qlt_handle_imm_notify(struct scsi_qla_host *vha, send_notify_ack = 0; break; + case IMM_NTFY_SRR: + qlt_prepare_srr_imm(vha, iocb); + send_notify_ack = 0; + break; + default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf06d, "qla_target(%d): Received unknown immediate " @@ -6429,6 +7442,9 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha) spin_lock_init(&tgt->sess_work_lock); INIT_WORK(&tgt->sess_work, qlt_sess_work_fn); INIT_LIST_HEAD(&tgt->sess_works_list); + spin_lock_init(&tgt->srr_lock); + INIT_LIST_HEAD(&tgt->srr_list); + INIT_WORK(&tgt->srr_work, qlt_handle_srr_work); atomic_set(&tgt->tgt_global_resets_count, 0); base_vha->vha_tgt.qla_tgt = tgt; @@ -7077,6 +8093,32 @@ qlt_81xx_config_nvram_stage2(struct scsi_qla_host *vha, } } +/* Update any settings that depend on ha->fw_*_version. */ +void +qlt_config_nvram_with_fw_version(struct scsi_qla_host *vha) +{ + struct qla_hw_data *ha = vha->hw; + + if (!QLA_TGT_MODE_ENABLED()) + return; + + if (ql2xtgt_tape_enable && qlt_has_sler_fw_bug(ha)) { + ql_log(ql_log_warn, vha, 0x11036, + "WARNING: ignoring ql2xtgt_tape_enable due to buggy HBA firmware; please upgrade FW\n"); + + /* Disable FC Tape support */ + if (ha->isp_ops->nvram_config == qla81xx_nvram_config) { + struct init_cb_81xx *icb = + (struct init_cb_81xx *)ha->init_cb; + icb->firmware_options_2 &= cpu_to_le32(~BIT_12); + } else { + struct init_cb_24xx *icb = + (struct init_cb_24xx *)ha->init_cb; + icb->firmware_options_2 &= cpu_to_le32(~BIT_12); + } + } +} + void qlt_modify_vp_config(struct scsi_qla_host *vha, struct vp_config_entry_24xx *vpmod) diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index ab2285c40573..61072fb41b29 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -184,6 +184,7 @@ struct nack_to_isp { #define NOTIFY_ACK_SRR_REJECT_REASON_UNABLE_TO_PERFORM 0x9 #define NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_NO_EXPL 0 +#define NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_INVALID_OX_ID_RX_ID 0x17 #define NOTIFY_ACK_SRR_FLAGS_REJECT_EXPL_UNABLE_TO_SUPPLY_DATA 0x2a #define NOTIFY_ACK_SUCCESS 0x01 @@ -686,6 +687,8 @@ struct qla_tgt_func_tmpl { int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, u64, uint16_t, uint32_t); struct qla_tgt_cmd *(*get_cmd)(struct fc_port *); + int (*get_cmd_ref)(struct qla_tgt_cmd *cmd); + void (*put_cmd_ref)(struct qla_tgt_cmd *cmd); void (*rel_cmd)(struct qla_tgt_cmd *); void (*free_cmd)(struct qla_tgt_cmd *); void (*free_mcmd)(struct qla_tgt_mgmt_cmd *); @@ -823,7 +826,13 @@ struct qla_tgt { int notify_ack_expected; int abts_resp_expected; int modify_lun_expected; + + spinlock_t srr_lock; + struct list_head srr_list; + struct work_struct srr_work; + atomic_t tgt_global_resets_count; + struct list_head tgt_list_entry; }; @@ -861,6 +870,7 @@ enum trace_flags { TRC_DATA_IN = BIT_18, TRC_ABORT = BIT_19, TRC_DIF_ERR = BIT_20, + TRC_SRR_IMM = BIT_21, }; struct qla_tgt_cmd { @@ -881,6 +891,10 @@ struct qla_tgt_cmd { unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; + + /* Call qlt_free_sg() if set. */ + unsigned int free_sg:1; + unsigned int write_data_transferred:1; /* Set if the SCSI status was sent successfully. */ @@ -892,6 +906,9 @@ struct qla_tgt_cmd { unsigned int cmd_in_wq:1; unsigned int edif:1; + /* Set if a SRR was rejected. */ + unsigned int srr_failed:1; + /* Set if the exchange has been terminated. */ unsigned int sent_term_exchg:1; @@ -901,6 +918,7 @@ struct qla_tgt_cmd { */ unsigned int aborted:1; + struct qla_tgt_srr *srr; struct scatterlist *sg; /* cmd data buffer SG vector */ int sg_cnt; /* SG segments count */ int bufflen; /* cmd buffer length */ @@ -940,6 +958,14 @@ struct qla_tgt_cmd { uint16_t prot_flags; unsigned long jiffies_at_term_exchg; + + /* + * jiffies64 when qlt_rdy_to_xfer() or qlt_xmit_response() first + * called, or 0 when not in those states. Used to limit the number of + * SRR retries. + */ + uint64_t jiffies_at_hw_st_entry; + uint64_t jiffies_at_alloc; uint64_t jiffies_at_free; @@ -1002,6 +1028,45 @@ struct qla_tgt_prm { uint16_t tot_dsds; }; +/* + * SRR (Sequence Retransmission Request) - resend or re-receive some or all + * data or status to recover from a transient I/O error. + */ +struct qla_tgt_srr { + /* + * Copy of immediate notify SRR message received from hw; valid only if + * imm_ntfy_recvd is true. + */ + struct imm_ntfy_from_isp imm_ntfy; + + struct list_head srr_list_entry; + + /* The command affected by this SRR, or NULL if not yet determined. */ + struct qla_tgt_cmd *cmd; + + /* Used to detect if the HBA has been reset since receiving the SRR. */ + uint32_t reset_count; + + /* + * The hardware sends two messages for each SRR - an immediate notify + * and a CTIO with CTIO_SRR_RECEIVED status. These keep track of which + * messages have been received. The SRR can be processed once both of + * these are true. + */ + bool imm_ntfy_recvd; + bool ctio_recvd; + + /* + * This is set to true if the affected command was aborted (cmd may be + * set to NULL), in which case the immediate notify exchange also needs + * to be aborted. + */ + bool aborted; + + /* This is set to true to force the SRR to be rejected. */ + bool reject; +}; + /* Check for Switch reserved address */ #define IS_SW_RESV_ADDR(_s_id) \ ((_s_id.b.domain == 0xff) && ((_s_id.b.area & 0xf0) == 0xf0)) @@ -1056,6 +1121,20 @@ static inline uint32_t sid_to_key(const be_id_t s_id) s_id.al_pa; } +/* + * Free the scatterlist allocated by qlt_set_data_offset(). Call this only if + * cmd->free_sg is set. + */ +static inline void qlt_free_sg(struct qla_tgt_cmd *cmd) +{ + /* + * The scatterlist may be chained to the original scatterlist, but we + * only need to free the first segment here since that is the only part + * allocated by qlt_set_data_offset(). + */ + kfree(cmd->sg); +} + /* * Exported symbols from qla_target.c LLD logic used by qla2xxx code.. */ @@ -1064,6 +1143,7 @@ extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, struct rsp_que *, extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); extern int qlt_abort_cmd(struct qla_tgt_cmd *); +void qlt_srr_abort(struct qla_tgt_cmd *cmd, bool reject); void qlt_send_term_exchange(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); @@ -1086,6 +1166,7 @@ extern void qlt_81xx_config_nvram_stage2(struct scsi_qla_host *, struct init_cb_81xx *); extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *, struct nvram_81xx *); +void qlt_config_nvram_with_fw_version(struct scsi_qla_host *vha); extern void qlt_modify_vp_config(struct scsi_qla_host *, struct vp_config_entry_24xx *); extern void qlt_probe_one_stage1(struct scsi_qla_host *, struct qla_hw_data *); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 169219fe47a2..2fff68935338 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -291,6 +291,16 @@ static struct qla_tgt_cmd *tcm_qla2xxx_get_cmd(struct fc_port *sess) return cmd; } +static int tcm_qla2xxx_get_cmd_ref(struct qla_tgt_cmd *cmd) +{ + return target_get_sess_cmd(&cmd->se_cmd, true); +} + +static void tcm_qla2xxx_put_cmd_ref(struct qla_tgt_cmd *cmd) +{ + target_put_sess_cmd(&cmd->se_cmd); +} + static void tcm_qla2xxx_rel_cmd(struct qla_tgt_cmd *cmd) { target_free_tag(cmd->sess->se_sess, &cmd->se_cmd); @@ -531,6 +541,9 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work) if (cmd->se_cmd.pi_err) transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err); + else if (cmd->srr_failed) + transport_generic_request_failure(&cmd->se_cmd, + TCM_SNACK_REJECTED); else transport_generic_request_failure(&cmd->se_cmd, TCM_CHECK_CONDITION_ABORT_CMD); @@ -1526,6 +1539,8 @@ static const struct qla_tgt_func_tmpl tcm_qla2xxx_template = { .handle_data = tcm_qla2xxx_handle_data, .handle_tmr = tcm_qla2xxx_handle_tmr, .get_cmd = tcm_qla2xxx_get_cmd, + .get_cmd_ref = tcm_qla2xxx_get_cmd_ref, + .put_cmd_ref = tcm_qla2xxx_put_cmd_ref, .rel_cmd = tcm_qla2xxx_rel_cmd, .free_cmd = tcm_qla2xxx_free_cmd, .free_mcmd = tcm_qla2xxx_free_mcmd, -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:50:15
|
(target mode) When qla2xxx is loaded with qlini_mode=disabled, ha->flags.disable_msix_handshake is used before it is set, resulting in the wrong interrupt handler being used on certain HBAs (qla2xxx_msix_rsp_q_hs() is used when qla2xxx_msix_rsp_q() should be used). The only difference between these two interrupt handlers is that the _hs() version writes to a register to clear the "RISC" interrupt, whereas the other version does not. So this bug results in the RISC interrupt being cleared when it should not be. This occasionally causes a different interrupt handler qla24xx_msix_default() for a different vector to see ((stat & HSRX_RISC_INT) == 0) and ignore its interrupt, which then causes problems like: qla2xxx [0000:02:00.0]-d04c:6: MBX Command timeout for cmd 20, iocontrol=8 jiffies=1090c0300 mb[0-3]=[0x4000 0x0 0x40 0xda] mb7 0x500 host_status 0x40000010 hccr 0x3f00 qla2xxx [0000:02:00.0]-101e:6: Mailbox cmd timeout occurred, cmd=0x20, mb[0]=0x20. Scheduling ISP abort (the cmd varies; sometimes it is 0x20, 0x22, 0x54, 0x5a, 0x5d, or 0x6a) This problem can be reproduced with a 16 or 32 Gbps HBA by loading qla2xxx with qlini_mode=disabled and running a high IOPS test while triggering frequent RSCN database change events. While analyzing the problem I discovered that even with disable_msix_handshake forced to 0, it is not necessary to clear the RISC interrupt from qla2xxx_msix_rsp_q_hs() (more below). So just completely remove qla2xxx_msix_rsp_q_hs() and the logic for selecting it, which also fixes the bug with qlini_mode=disabled. The test below describes the justification for not needing qla2xxx_msix_rsp_q_hs(): Force disable_msix_handshake to 0: qla24xx_config_rings(): if (0 && (ha->fw_attributes & BIT_6) && (IS_MSIX_NACK_CAPABLE(ha)) && (ha->flags.msix_enabled)) { In qla24xx_msix_rsp_q() and qla2xxx_msix_rsp_q_hs(), check: (rd_reg_dword(®->host_status) & HSRX_RISC_INT) Count the number of calls to each function with HSRX_RISC_INT set and the number with HSRX_RISC_INT not set while performing some I/O. If qla2xxx_msix_rsp_q_hs() clears the RISC interrupt (original code): qla24xx_msix_rsp_q: 50% of calls have HSRX_RISC_INT set qla2xxx_msix_rsp_q_hs: 5% of calls have HSRX_RISC_INT set (# of qla2xxx_msix_rsp_q_hs interrupts) = (# of qla24xx_msix_rsp_q interrupts) * 3 If qla2xxx_msix_rsp_q_hs() does not clear the RISC interrupt (patched code): qla24xx_msix_rsp_q: 100% of calls have HSRX_RISC_INT set qla2xxx_msix_rsp_q_hs: 9% of calls have HSRX_RISC_INT set (# of qla2xxx_msix_rsp_q_hs interrupts) = (# of qla24xx_msix_rsp_q interrupts) * 3 In the case of the original code, qla24xx_msix_rsp_q() was seeing HSRX_RISC_INT set only 50% of the time because qla2xxx_msix_rsp_q_hs() was clearing it when it shouldn't have been. In the patched code, qla24xx_msix_rsp_q() sees HSRX_RISC_INT set 100% of the time, which makes sense if that interrupt handler needs to clear the RISC interrupt (which it does). qla2xxx_msix_rsp_q_hs() sees HSRX_RISC_INT only 9% of the time, which is just overlap from the other interrupt during the high IOPS test. Tested with SCST on: QLE2742 FW:v9.08.02 (32 Gbps 2-port) QLE2694L FW:v9.10.11 (16 Gbps 4-port) QLE2694L FW:v9.08.02 (16 Gbps 4-port) QLE2672 FW:v8.07.12 (16 Gbps 2-port) both initiator and target mode Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_def.h | 1 - drivers/scsi/qla2xxx/qla_gbl.h | 2 +- drivers/scsi/qla2xxx/qla_isr.c | 32 +++----------------------------- drivers/scsi/qla2xxx/qla_mid.c | 4 +--- 4 files changed, 5 insertions(+), 34 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index cb95b7b12051..b3265952c4be 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3503,7 +3503,6 @@ struct isp_operations { #define QLA_MSIX_RSP_Q 0x01 #define QLA_ATIO_VECTOR 0x02 #define QLA_MSIX_QPAIR_MULTIQ_RSP_Q 0x03 -#define QLA_MSIX_QPAIR_MULTIQ_RSP_Q_HS 0x04 #define QLA_MIDX_DEFAULT 0 #define QLA_MIDX_RSP_Q 1 diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 145defc420f2..55d531c19e6b 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -766,7 +766,7 @@ extern int qla2x00_dfs_remove(scsi_qla_host_t *); /* Globa function prototypes for multi-q */ extern int qla25xx_request_irq(struct qla_hw_data *, struct qla_qpair *, - struct qla_msix_entry *, int); + struct qla_msix_entry *); extern int qla25xx_init_req_que(struct scsi_qla_host *, struct req_que *); extern int qla25xx_init_rsp_que(struct scsi_qla_host *, struct rsp_que *); extern int qla25xx_create_req_que(struct qla_hw_data *, uint16_t, uint8_t, diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index c4c6b5c6658c..a3971afc2dd1 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -4467,32 +4467,6 @@ qla2xxx_msix_rsp_q(int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t -qla2xxx_msix_rsp_q_hs(int irq, void *dev_id) -{ - struct qla_hw_data *ha; - struct qla_qpair *qpair; - struct device_reg_24xx __iomem *reg; - unsigned long flags; - - qpair = dev_id; - if (!qpair) { - ql_log(ql_log_info, NULL, 0x505b, - "%s: NULL response queue pointer.\n", __func__); - return IRQ_NONE; - } - ha = qpair->hw; - - reg = &ha->iobase->isp24; - spin_lock_irqsave(&ha->hardware_lock, flags); - wrt_reg_dword(®->hccr, HCCRX_CLR_RISC_INT); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - - queue_work(ha->wq, &qpair->q_work); - - return IRQ_HANDLED; -} - /* Interrupt handling helpers. */ struct qla_init_msix_entry { @@ -4505,7 +4479,6 @@ static const struct qla_init_msix_entry msix_entries[] = { { "rsp_q", qla24xx_msix_rsp_q }, { "atio_q", qla83xx_msix_atio_q }, { "qpair_multiq", qla2xxx_msix_rsp_q }, - { "qpair_multiq_hs", qla2xxx_msix_rsp_q_hs }, }; static const struct qla_init_msix_entry qla82xx_msix_entries[] = { @@ -4792,9 +4765,10 @@ qla2x00_free_irqs(scsi_qla_host_t *vha) } int qla25xx_request_irq(struct qla_hw_data *ha, struct qla_qpair *qpair, - struct qla_msix_entry *msix, int vector_type) + struct qla_msix_entry *msix) { - const struct qla_init_msix_entry *intr = &msix_entries[vector_type]; + const struct qla_init_msix_entry *intr = + &msix_entries[QLA_MSIX_QPAIR_MULTIQ_RSP_Q]; scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); int ret; diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 8b71ac0b1d99..0abc47e72e0b 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -899,9 +899,7 @@ qla25xx_create_rsp_que(struct qla_hw_data *ha, uint16_t options, rsp->options, rsp->id, rsp->rsp_q_in, rsp->rsp_q_out); - ret = qla25xx_request_irq(ha, qpair, qpair->msix, - ha->flags.disable_msix_handshake ? - QLA_MSIX_QPAIR_MULTIQ_RSP_Q : QLA_MSIX_QPAIR_MULTIQ_RSP_Q_HS); + ret = qla25xx_request_irq(ha, qpair, qpair->msix); if (ret) goto que_failed; -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:50:10
|
(target mode) Print better debug info when terminating a command, and print the response status from the hardware. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_target.c | 40 +++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 5136549005e7..ff4124eccc9c 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -54,7 +54,7 @@ * | Misc | 0xd303 | 0xd031-0xd0ff | * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | - * | Target Mode | 0xe081 | | + * | Target Mode | 0xe084 | | * | Target Mode Management | 0xf09b | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000d | | diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index df5c9aac5617..72c74f8f5375 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1909,6 +1909,10 @@ static void qlt_24xx_retry_term_exchange(struct scsi_qla_host *vha, * ABTS response. So, in it ID fields are reversed. */ + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe082, + "qla_target(%d): tag %u: Sending TERM EXCH CTIO for ABTS\n", + vha->vp_idx, le32_to_cpu(entry->exchange_addr_to_abort)); + ctio->entry_type = CTIO_TYPE7; ctio->entry_count = 1; ctio->nport_handle = entry->nport_handle; @@ -3620,16 +3624,24 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair, { struct scsi_qla_host *vha = qpair->vha; struct ctio7_to_24xx *ctio24; - struct qla_hw_data *ha = vha->hw; request_t *pkt; int ret = 0; uint16_t temp; - ql_dbg(ql_dbg_tgt, vha, 0xe009, "Sending TERM EXCH CTIO (ha=%p)\n", ha); - if (cmd) vha = cmd->vha; + if (cmd) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe009, + "qla_target(%d): tag %lld: Sending TERM EXCH CTIO state %d cmd_sent_to_fw %u\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->state, + cmd->cmd_sent_to_fw); + } else { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe009, + "qla_target(%d): tag %u: Sending TERM EXCH CTIO (no cmd)\n", + vha->vp_idx, le32_to_cpu(atio->u.isp24.exchange_addr)); + } + pkt = (request_t *)qla2x00_alloc_iocbs_ready(qpair, NULL); if (pkt == NULL) { ql_dbg(ql_dbg_tgt, vha, 0xe050, @@ -3920,6 +3932,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, struct se_cmd *se_cmd; struct qla_tgt_cmd *cmd; struct qla_qpair *qpair = rsp->qpair; + uint16_t ctio_flags; if (handle & CTIO_INTERMEDIATE_HANDLE_MARK) { /* That could happen only in case of an error/reset/abort */ @@ -3931,11 +3944,28 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, return; } + ctio_flags = le16_to_cpu(ctio->flags); + cmd = qlt_ctio_to_cmd(vha, rsp, handle, ctio); - if (cmd == NULL) + if (unlikely(cmd == NULL)) { + if ((handle & ~QLA_TGT_HANDLE_MASK) == QLA_TGT_SKIP_HANDLE && + (ctio_flags & 0xe1ff) == (CTIO7_FLAGS_STATUS_MODE_1 | + CTIO7_FLAGS_TERMINATE)) { + u32 tag = le32_to_cpu(ctio->exchange_address); + + if (status == CTIO_SUCCESS) + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe083, + "qla_target(%d): tag %u: term exchange successful\n", + vha->vp_idx, tag); + else + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe084, + "qla_target(%d): tag %u: term exchange failed; status = 0x%x\n", + vha->vp_idx, tag, status); + } return; + } - if ((le16_to_cpu(ctio->flags) & CTIO7_FLAGS_DATA_OUT) && cmd->sess) + if ((ctio_flags & CTIO7_FLAGS_DATA_OUT) && cmd->sess) qlt_chk_edif_rx_sa_delete_pending(vha, cmd->sess, ctio); se_cmd = &cmd->se_cmd; -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:49:39
|
(target mode) - Add the command tag to various messages so that different messages about the same command can be correlated. - For CTIO errors (i.e. when the HW reports an error about a cmd), print the cmd tag, opcode, state, initiator WWPN, and LUN. This info helps an administrator determine what is going wrong. - When a command experiences a transport error, log a message when it is freed. This makes debugging exceptions easier. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: rebased against changes to prior patches (removal of TRC_CTIO_IGNORED). drivers/scsi/qla2xxx/qla_dbg.c | 2 +- drivers/scsi/qla2xxx/qla_target.c | 88 ++++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index ff4124eccc9c..a19471d51ea5 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -54,7 +54,7 @@ * | Misc | 0xd303 | 0xd031-0xd0ff | * | | | 0xd101-0xd1fe | * | | | 0xd214-0xd2fe | - * | Target Mode | 0xe084 | | + * | Target Mode | 0xe086 | | * | Target Mode Management | 0xf09b | 0xf002 | * | | | 0xf046-0xf049 | * | Target Mode Task Management | 0x1000d | | diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a71000b122ce..488c003f22c4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1984,8 +1984,12 @@ static void abort_cmds_for_lun(struct scsi_qla_host *vha, u64 lun, be_id_t s_id) cmd_key = sid_to_key(cmd->atio.u.isp24.fcp_hdr.s_id); cmd_lun = scsilun_to_int( (struct scsi_lun *)&cmd->atio.u.isp24.fcp_cmnd.lun); - if (cmd_key == key && cmd_lun == lun) + if (cmd_key == key && cmd_lun == lun) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe085, + "qla_target(%d): tag %lld: aborted by TMR\n", + vha->vp_idx, cmd->se_cmd.tag); cmd->aborted = 1; + } } spin_unlock_irqrestore(&vha->cmd_list_lock, flags); } @@ -3839,6 +3843,15 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->sg_mapped); + if (unlikely(cmd->aborted || + (cmd->trc_flags & (TRC_CTIO_STRANGE | TRC_CTIO_ERR)))) { + ql_dbg(ql_dbg_tgt_mgt, cmd->vha, 0xe086, + "qla_target(%d): tag %lld: free cmd (trc_flags %x, aborted %u, sent_term_exchg %u, rsp_sent %u)\n", + cmd->vha->vp_idx, cmd->se_cmd.tag, + cmd->trc_flags, cmd->aborted, cmd->sent_term_exchg, + cmd->rsp_sent); + } + if (unlikely(cmd->cdb != &cmd->atio.u.isp24.fcp_cmnd.cdb[0])) { kfree(cmd->cdb); cmd->cdb = &cmd->atio.u.isp24.fcp_cmnd.cdb[0]; @@ -3851,7 +3864,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) WARN_ON(1); return; } - cmd->jiffies_at_free = get_jiffies_64(); cmd->vha->hw->tgt.tgt_ops->rel_cmd(cmd); } EXPORT_SYMBOL(qlt_free_cmd); @@ -3916,7 +3928,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, struct ctio7_from_24xx *ctio) { struct qla_hw_data *ha = vha->hw; - struct se_cmd *se_cmd; struct qla_tgt_cmd *cmd; struct qla_qpair *qpair = rsp->qpair; uint16_t ctio_flags; @@ -3955,12 +3966,12 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, if ((ctio_flags & CTIO7_FLAGS_DATA_OUT) && cmd->sess) qlt_chk_edif_rx_sa_delete_pending(vha, cmd->sess, ctio); - se_cmd = &cmd->se_cmd; cmd->cmd_sent_to_fw = 0; qlt_unmap_sg(vha, cmd); if (unlikely(status != CTIO_SUCCESS)) { + u8 op = cmd->cdb ? cmd->cdb[0] : 0; bool term_exchg = false; /* @@ -3978,8 +3989,10 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, term_exchg = true; if (printk_ratelimit()) dev_info(&vha->hw->pdev->dev, - "qla_target(%d): CTIO with INVALID_RX_ID ATIO attr %x CTIO Flags %x|%x\n", - vha->vp_idx, cmd->atio.u.isp24.attr, + "qla_target(%d): tag %lld, op %x: CTIO with INVALID_RX_ID status 0x%x received (state %d, port %8phC, LUN %lld, ATIO attr %x, CTIO Flags %x|%x)\n", + vha->vp_idx, cmd->se_cmd.tag, op, + status, cmd->state, cmd->sess->port_name, + cmd->unpacked_lun, cmd->atio.u.isp24.attr, ((cmd->ctio_flags >> 9) & 0xf), cmd->ctio_flags); break; @@ -3990,13 +4003,31 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, term_exchg = true; fallthrough; case CTIO_TIMEOUT: + { + const char *status_str; + + switch (status & 0xFFFF) { + case CTIO_LIP_RESET: + status_str = "LIP_RESET"; + break; + case CTIO_TARGET_RESET: + status_str = "TARGET_RESET"; + break; + case CTIO_ABORTED: + status_str = "ABORTED"; + break; + case CTIO_TIMEOUT: + default: + status_str = "TIMEOUT"; + break; + } ql_dbg(ql_dbg_tgt_mgt, vha, 0xf058, - "qla_target(%d): CTIO with " - "status %#x received, state %x, se_cmd %p, " - "(LIP_RESET=e, ABORTED=2, TARGET_RESET=17, " - "TIMEOUT=b, INVALID_RX_ID=8)\n", vha->vp_idx, - status, cmd->state, se_cmd); + "qla_target(%d): tag %lld, op %x: CTIO with %s status 0x%x received (state %d, port %8phC, LUN %lld)\n", + vha->vp_idx, cmd->se_cmd.tag, op, + status_str, status, cmd->state, + cmd->sess->port_name, cmd->unpacked_lun); break; + } case CTIO_PORT_LOGGED_OUT: case CTIO_PORT_UNAVAILABLE: @@ -4005,10 +4036,11 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, (status & 0xFFFF) == CTIO_PORT_LOGGED_OUT; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf059, - "qla_target(%d): CTIO with %s status %x " - "received (state %x, se_cmd %p)\n", vha->vp_idx, + "qla_target(%d): tag %lld, op %x: CTIO with %s status 0x%x received (state %d, port %8phC, LUN %lld)\n", + vha->vp_idx, cmd->se_cmd.tag, op, logged_out ? "PORT LOGGED OUT" : "PORT UNAVAILABLE", - status, cmd->state, se_cmd); + status, cmd->state, cmd->sess->port_name, + cmd->unpacked_lun); term_exchg = true; if (logged_out && cmd->sess) { @@ -4025,14 +4057,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, } break; } + case CTIO_DIF_ERROR: { struct ctio_crc_from_fw *crc = (struct ctio_crc_from_fw *)ctio; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073, - "qla_target(%d): CTIO with DIF_ERROR status %x " - "received (state %x, ulp_cmd %p) actual_dif[0x%llx] " - "expect_dif[0x%llx]\n", - vha->vp_idx, status, cmd->state, se_cmd, + "qla_target(%d): tag %lld, op %x: CTIO with DIF_ERROR status 0x%x received (state %d, port %8phC, LUN %lld, actual_dif[0x%llx] expect_dif[0x%llx])\n", + vha->vp_idx, cmd->se_cmd.tag, op, status, + cmd->state, cmd->sess->port_name, + cmd->unpacked_lun, *((u64 *)&crc->actual_dif[0]), *((u64 *)&crc->expected_dif[0])); @@ -4045,14 +4078,18 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, case CTIO_FAST_INVALID_REQ: case CTIO_FAST_SPI_ERR: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b, - "qla_target(%d): CTIO with EDIF error status 0x%x received (state %x, se_cmd %p\n", - vha->vp_idx, status, cmd->state, se_cmd); + "qla_target(%d): tag %lld, op %x: CTIO with EDIF error status 0x%x received (state %d, port %8phC, LUN %lld)\n", + vha->vp_idx, cmd->se_cmd.tag, op, status, + cmd->state, cmd->sess->port_name, + cmd->unpacked_lun); break; default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b, - "qla_target(%d): CTIO with error status 0x%x received (state %x, se_cmd %p\n", - vha->vp_idx, status, cmd->state, se_cmd); + "qla_target(%d): tag %lld, op %x: CTIO with error status 0x%x received (state %d, port %8phC, LUN %lld)\n", + vha->vp_idx, cmd->se_cmd.tag, op, status, + cmd->state, cmd->sess->port_name, + cmd->unpacked_lun); break; } @@ -4090,12 +4127,13 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, } else if (cmd->aborted) { cmd->trc_flags |= TRC_CTIO_ABORTED; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf01e, - "Aborted command %p (tag %lld) finished\n", cmd, se_cmd->tag); + "qla_target(%d): tag %lld: Aborted command finished\n", + vha->vp_idx, cmd->se_cmd.tag); } else { cmd->trc_flags |= TRC_CTIO_STRANGE; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05c, - "qla_target(%d): A command in state (%d) should " - "not return a CTIO complete\n", vha->vp_idx, cmd->state); + "qla_target(%d): tag %lld: A command in state (%d) should not return a CTIO complete\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->state); } if (unlikely(status != CTIO_SUCCESS) && -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:48:44
|
This patch applies to the out-of-tree SCST project, not to the Linux kernel. Apply when importing the upstream patch with the same title. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c index e5bf210d3..fb5956346 100644 --- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c +++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c @@ -685,6 +685,8 @@ static void sqa_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) cmd->state = QLA_TGT_STATE_DONE; cmd->trc_flags |= TRC_CMD_DONE; + if (unlikely(!cmd->rsp_sent)) + scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED); scst_tgt_cmd_done(scst_cmd, scst_work_context); TRACE_EXIT(); -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:47:22
|
(target mode) Add cmd->rsp_sent to indicate that the SCSI status has been sent successfully, so that SCST can be informed of any transport errors. This will also be used for logging in later patches. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_target.c | 4 ++++ drivers/scsi/qla2xxx/qla_target.h | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index c2876b442a08..a71000b122ce 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4075,6 +4075,10 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, if (cmd->state == QLA_TGT_STATE_PROCESSED) { cmd->trc_flags |= TRC_CTIO_DONE; + + if (likely(status == CTIO_SUCCESS)) + cmd->rsp_sent = 1; + } else if (cmd->state == QLA_TGT_STATE_NEED_DATA) { cmd->state = QLA_TGT_STATE_DATA_IN; diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 97aa6d9cfc27..ab2285c40573 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -882,6 +882,10 @@ struct qla_tgt_cmd { unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; unsigned int write_data_transferred:1; + + /* Set if the SCSI status was sent successfully. */ + unsigned int rsp_sent:1; + unsigned int q_full:1; unsigned int term_exchg:1; unsigned int cmd_sent_to_fw:1; -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:45:51
|
This patch applies to the out-of-tree SCST project, not to the Linux kernel. Apply when importing the upstream patch with the same title. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c index cb58fae20..e5bf210d3 100644 --- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c +++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c @@ -420,16 +420,15 @@ static int sqa_qla2xxx_handle_cmd(scsi_qla_host_t *vha, TRACE_DBG("sqatgt(%ld/%d): Handling command: length=%d, fcp_task_attr=%d, direction=%d, bidirectional=%d lun=%llx cdb=%x tag=%d cmd %p ulpcmd %p\n", vha->host_no, vha->vp_idx, data_length, task_codes, data_dir, bidi, cmd->unpacked_lun, - atio->u.isp24.fcp_cmnd.cdb[0], + cdb[0], atio->u.isp24.exchange_addr, cmd, cmd->scst_cmd); cmd->scst_cmd = scst_rx_cmd(scst_sess, (uint8_t *)&atio->u.isp24.fcp_cmnd.lun, sizeof(atio->u.isp24.fcp_cmnd.lun), - atio->u.isp24.fcp_cmnd.cdb, - sizeof(atio->u.isp24.fcp_cmnd.cdb) + - (atio->u.isp24.fcp_cmnd.add_cdb_len * 4), + cdb, + cmd->cdb_len, SCST_ATOMIC); if (cmd->scst_cmd == NULL) { @@ -1552,7 +1551,6 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd) scst_to_tgt_dma_dir(scst_cmd_get_data_direction(scst_cmd)); cmd->offset = scst_cmd_get_ppl_offset(scst_cmd); cmd->scsi_status = scst_cmd_get_status(scst_cmd); - cmd->cdb = (unsigned char *) scst_cmd_get_cdb(scst_cmd); cmd->lba = scst_cmd_get_lba(scst_cmd); cmd->trc_flags |= TRC_XMIT_STATUS; @@ -1635,7 +1633,6 @@ static int sqa_rdy_to_xfer(struct scst_cmd *scst_cmd) cmd->dma_data_direction = scst_to_tgt_dma_dir(scst_cmd_get_data_direction(scst_cmd)); - cmd->cdb = scst_cmd_get_cdb(scst_cmd); cmd->sg = scst_cmd_get_sg(scst_cmd); cmd->sg_cnt = scst_cmd_get_sg_cnt(scst_cmd); cmd->scsi_status = scst_cmd_get_status(scst_cmd); -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:43:40
|
(target mode) If handle_tmr() fails: - The code for QLA_TGT_ABTS results in memory-use-after-free and double-free: qlt_do_tmr_work() qlt_build_abts_resp_iocb() qpair->req->outstanding_cmds[h] = (srb_t *)mcmd; mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool); FIRST FREE qlt_handle_abts_completion() mcmd = qlt_ctio_to_cmd() cmd = req->outstanding_cmds[h]; return cmd; vha = mcmd->vha; USE-AFTER-FREE ha->tgt.tgt_ops->free_mcmd(mcmd); SECOND FREE - qlt_send_busy() makes no sense because it sends a SCSI command response instead of a TMR response. Instead just call qlt_xmit_tm_rsp() to send a TMR failed response, since that code is well-tested and handles a number of corner cases. But it would be incorrect to call ha->tgt.tgt_ops->free_mcmd() after handle_tmr() failed, so add a flag to mcmd indicating the proper way to free the mcmd so that qlt_xmit_tm_rsp() can be used for both cases. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: - Change FCP_TMF_REJECTED to FCP_TMF_FAILED. - Add QLA24XX_MGMT_LLD_OWNED and qlt_free_ul_mcmd(). - Improve patch description. drivers/scsi/qla2xxx/qla_os.c | 2 +- drivers/scsi/qla2xxx/qla_target.c | 54 +++++++++++++------------------ drivers/scsi/qla2xxx/qla_target.h | 2 ++ 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 2a3eb1dacf86..64387224f28a 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1893,7 +1893,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) * Currently, only ABTS response gets on the * outstanding_cmds[] */ - ha->tgt.tgt_ops->free_mcmd( + qlt_free_ul_mcmd(ha, (struct qla_tgt_mgmt_cmd *) sp); break; default: diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 849ab256807b..69ccba3436ec 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2005,7 +2005,6 @@ static void qlt_do_tmr_work(struct work_struct *work) struct qla_hw_data *ha = mcmd->vha->hw; int rc; uint32_t tag; - unsigned long flags; switch (mcmd->tmr_func) { case QLA_TGT_ABTS: @@ -2020,34 +2019,12 @@ static void qlt_do_tmr_work(struct work_struct *work) mcmd->tmr_func, tag); if (rc != 0) { - spin_lock_irqsave(mcmd->qpair->qp_lock_ptr, flags); - switch (mcmd->tmr_func) { - case QLA_TGT_ABTS: - mcmd->fc_tm_rsp = FCP_TMF_REJECTED; - qlt_build_abts_resp_iocb(mcmd); - break; - case QLA_TGT_LUN_RESET: - case QLA_TGT_CLEAR_TS: - case QLA_TGT_ABORT_TS: - case QLA_TGT_CLEAR_ACA: - case QLA_TGT_TARGET_RESET: - qlt_send_busy(mcmd->qpair, &mcmd->orig_iocb.atio, - qla_sam_status); - break; - - case QLA_TGT_ABORT_ALL: - case QLA_TGT_NEXUS_LOSS_SESS: - case QLA_TGT_NEXUS_LOSS: - qlt_send_notify_ack(mcmd->qpair, - &mcmd->orig_iocb.imm_ntfy, 0, 0, 0, 0, 0, 0); - break; - } - spin_unlock_irqrestore(mcmd->qpair->qp_lock_ptr, flags); - ql_dbg(ql_dbg_tgt_mgt, mcmd->vha, 0xf052, "qla_target(%d): tgt_ops->handle_tmr() failed: %d\n", mcmd->vha->vp_idx, rc); - mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool); + mcmd->flags |= QLA24XX_MGMT_LLD_OWNED; + mcmd->fc_tm_rsp = FCP_TMF_FAILED; + qlt_xmit_tm_rsp(mcmd); } } @@ -2234,6 +2211,19 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd) } EXPORT_SYMBOL(qlt_free_mcmd); +/* + * If the upper layer knows about this mgmt cmd, then call its ->free_cmd() + * callback, which will eventually call qlt_free_mcmd(). Otherwise, call + * qlt_free_mcmd() directly. + */ +void qlt_free_ul_mcmd(struct qla_hw_data *ha, struct qla_tgt_mgmt_cmd *mcmd) +{ + if (mcmd->flags & QLA24XX_MGMT_LLD_OWNED) + qlt_free_mcmd(mcmd); + else + ha->tgt.tgt_ops->free_mcmd(mcmd); +} + /* * ha->hardware_lock supposed to be held on entry. Might drop it, then * reacquire @@ -2326,12 +2316,12 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) "RESET-TMR online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), mcmd->reset_count, qpair->chip_reset); - ha->tgt.tgt_ops->free_mcmd(mcmd); + qlt_free_ul_mcmd(ha, mcmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); return; } - if (mcmd->flags == QLA24XX_MGMT_SEND_NACK) { + if (mcmd->flags & QLA24XX_MGMT_SEND_NACK) { switch (mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode) { case ELS_LOGO: case ELS_PRLO: @@ -2364,7 +2354,7 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) * qlt_xmit_tm_rsp() returns here.. */ if (free_mcmd) - ha->tgt.tgt_ops->free_mcmd(mcmd); + qlt_free_ul_mcmd(ha, mcmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); } @@ -5742,7 +5732,7 @@ static void qlt_handle_abts_completion(struct scsi_qla_host *vha, if (le32_to_cpu(entry->error_subcode1) == 0x1E && le32_to_cpu(entry->error_subcode2) == 0) { if (qlt_chk_unresolv_exchg(vha, rsp->qpair, entry)) { - ha->tgt.tgt_ops->free_mcmd(mcmd); + qlt_free_ul_mcmd(ha, mcmd); return; } qlt_24xx_retry_term_exchange(vha, rsp->qpair, @@ -5753,10 +5743,10 @@ static void qlt_handle_abts_completion(struct scsi_qla_host *vha, vha->vp_idx, entry->compl_status, entry->error_subcode1, entry->error_subcode2); - ha->tgt.tgt_ops->free_mcmd(mcmd); + qlt_free_ul_mcmd(ha, mcmd); } } else if (mcmd) { - ha->tgt.tgt_ops->free_mcmd(mcmd); + qlt_free_ul_mcmd(ha, mcmd); } } diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index eb15d8e9f79e..223c40bc9498 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -966,6 +966,7 @@ struct qla_tgt_mgmt_cmd { unsigned int flags; #define QLA24XX_MGMT_SEND_NACK BIT_0 #define QLA24XX_MGMT_ABORT_IO_ATTR_VALID BIT_1 +#define QLA24XX_MGMT_LLD_OWNED BIT_2 uint32_t reset_count; struct work_struct work; uint64_t unpacked_lun; @@ -1059,6 +1060,7 @@ extern int qlt_abort_cmd(struct qla_tgt_cmd *); void qlt_send_term_exchange(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); +void qlt_free_ul_mcmd(struct qla_hw_data *ha, struct qla_tgt_mgmt_cmd *mcmd); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); extern void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd); -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:42:18
|
(target mode) Similar fixes to both functions: qlt_xmit_response: - If the cmd cannot be processed, remember to call ->free_cmd() to prevent the target-mode midlevel from seeing a cmd lockup. - Do not try to send the response if the exchange has been terminated. - Check for chip reset once after lock instead of both before and after lock. - Give errors from qlt_pre_xmit_response() a lower priority to compensate for removing the first check for chip reset. qlt_rdy_to_xfer: - Check for chip reset after lock instead of before lock to avoid races. - Do not try to receive data if the exchange has been terminated. - Give errors from qlt_pci_map_calc_cnt() a lower priority to compensate for moving the check for chip reset. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_target.c | 86 +++++++++++++++++-------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index c6dc5e9efb69..849ab256807b 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3206,12 +3206,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, uint32_t full_req_cnt = 0; unsigned long flags = 0; int res; - - if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || - (cmd->sess && cmd->sess->deleted)) { - cmd->state = QLA_TGT_STATE_PROCESSED; - return 0; - } + int pre_xmit_res; ql_dbg_qp(ql_dbg_tgt, qpair, 0xe018, "is_send_status=%d, cmd->bufflen=%d, cmd->sg_cnt=%d, cmd->dma_data_direction=%d se_cmd[%p] qp %d\n", @@ -3219,33 +3214,39 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, 1 : 0, cmd->bufflen, cmd->sg_cnt, cmd->dma_data_direction, &cmd->se_cmd, qpair->id); - res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, + pre_xmit_res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, &full_req_cnt); - if (unlikely(res != 0)) { - return res; - } + /* + * Check pre_xmit_res later because we want to check other errors + * first. + */ spin_lock_irqsave(qpair->qp_lock_ptr, flags); + if (unlikely(cmd->sent_term_exchg || + cmd->sess->deleted || + !qpair->fw_started || + cmd->reset_count != qpair->chip_reset)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe101, + "qla_target(%d): tag %lld: skipping send response for aborted cmd\n", + vha->vp_idx, cmd->se_cmd.tag); + qlt_unmap_sg(vha, cmd); + cmd->state = QLA_TGT_STATE_PROCESSED; + vha->hw->tgt.tgt_ops->free_cmd(cmd); + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + return 0; + } + + /* Check for errors from qlt_pre_xmit_response(). */ + res = pre_xmit_res; + if (unlikely(res)) + goto out_unmap_unlock; + if (xmit_type == QLA_TGT_XMIT_STATUS) qpair->tgt_counters.core_qla_snd_status++; else qpair->tgt_counters.core_qla_que_buf++; - if (!qpair->fw_started || cmd->reset_count != qpair->chip_reset) { - /* - * Either the port is not online or this request was from - * previous life, just abort the processing. - */ - cmd->state = QLA_TGT_STATE_PROCESSED; - ql_dbg_qp(ql_dbg_async, qpair, 0xe101, - "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", - vha->flags.online, qla2x00_reset_active(vha), - cmd->reset_count, qpair->chip_reset); - res = 0; - goto out_unmap_unlock; - } - /* Does F/W have an IOCBs for this request */ res = qlt_check_reserve_free_req(qpair, full_req_cnt); if (unlikely(res)) @@ -3360,6 +3361,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) struct qla_tgt_prm prm; unsigned long flags = 0; int res = 0; + int pci_map_res; struct qla_qpair *qpair = cmd->qpair; memset(&prm, 0, sizeof(prm)); @@ -3368,28 +3370,36 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) prm.sg = NULL; prm.req_cnt = 1; - if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || - (cmd->sess && cmd->sess->deleted)) { - /* - * Either the port is not online or this request was from - * previous life, just abort the processing. - */ + /* Calculate number of entries and segments required */ + pci_map_res = qlt_pci_map_calc_cnt(&prm); + /* + * Check pci_map_res later because we want to check other errors first. + */ + + spin_lock_irqsave(qpair->qp_lock_ptr, flags); + + if (unlikely(cmd->sent_term_exchg || + cmd->sess->deleted || + !qpair->fw_started || + cmd->reset_count != qpair->chip_reset)) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xe102, + "qla_target(%d): tag %lld: skipping data-out for aborted cmd\n", + vha->vp_idx, cmd->se_cmd.tag); + qlt_unmap_sg(vha, cmd); cmd->aborted = 1; cmd->write_data_transferred = 0; cmd->state = QLA_TGT_STATE_DATA_IN; vha->hw->tgt.tgt_ops->handle_data(cmd); - ql_dbg_qp(ql_dbg_async, qpair, 0xe102, - "RESET-XFR online/active/old-count/new-count = %d/%d/%d/%d.\n", - vha->flags.online, qla2x00_reset_active(vha), - cmd->reset_count, qpair->chip_reset); + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); return 0; } - /* Calculate number of entries and segments required */ - if (qlt_pci_map_calc_cnt(&prm) != 0) - return -EAGAIN; + /* Check for errors from qlt_pci_map_calc_cnt(). */ + if (unlikely(pci_map_res != 0)) { + res = -EAGAIN; + goto out_unlock_free_unmap; + } - spin_lock_irqsave(qpair->qp_lock_ptr, flags); /* Does F/W have an IOCBs for this request */ res = qlt_check_reserve_free_req(qpair, prm.req_cnt); if (res != 0) -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:41:19
|
This patch applies to the out-of-tree SCST project, not to the Linux kernel. Apply when importing the upstream patch with the same title. SCST addendum: sqa_on_hw_pending_cmd_timeout() currently unmaps DMA, sets outstanding_cmds[h] to NULL, and forces the command to complete. This could cause a kernel crash if the HW later accesses the DMA mapping. It can also cause other problems if outstanding_cmds[h] is reused for a different command. Fix by doing this instead: - In sqa_on_hw_pending_cmd_timeout(), call qlt_send_term_exchange() first and then restart the timeout. After another timeout, reset the ISP. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: - On second timeout, reset the ISP rather than unmapping DMA that might be in use by the hardware. - Apply "scsi: qla2xxx: clear cmds after chip reset" from Dmitry Bogdanov as prerequisite. This is required for the ISP reset to clear the locked-up command. - Move the revert of 26f9ce53817a ("scsi: qla2xxx: Fix missed DMA unmap for aborted commands") from this patch to the previous patch since that patch fixed the oops, even though this patch is still necessary for its other improvements. Rename this patch and reword the patch description to match. - Remove TRC_CTIO_IGNORED. qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c | 136 ++++++++++-------- 1 file changed, 76 insertions(+), 60 deletions(-) diff --git a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c index e885b9711..07aee6e81 100644 --- a/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c +++ b/qla2x00t-32gbit/qla2x00-target/scst_qla2xxx.c @@ -187,6 +187,7 @@ static struct cmd_state_name { {QLA_TGT_STATE_NEED_DATA, "NeedData"}, {QLA_TGT_STATE_DATA_IN, "DataIn"}, {QLA_TGT_STATE_PROCESSED, "Processed"}, + {QLA_TGT_STATE_DONE, "Done"}, }; static char *cmdstate_to_str(uint8_t state) @@ -497,23 +498,14 @@ static void sqa_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) { struct scst_cmd *scst_cmd = cmd->scst_cmd; int rx_status; - unsigned long flags; TRACE_ENTRY(); - spin_lock_irqsave(&cmd->cmd_lock, flags); - if (cmd->aborted) { - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - + if (unlikely(cmd->aborted)) { scst_set_cmd_error(scst_cmd, SCST_LOAD_SENSE(scst_sense_internal_failure)); - scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET, - SCST_CONTEXT_THREAD); - return; - } - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - - if (cmd->write_data_transferred) { + rx_status = SCST_RX_STATUS_ERROR_SENSE_SET; + } else if (likely(cmd->write_data_transferred)) { rx_status = SCST_RX_STATUS_SUCCESS; } else { rx_status = SCST_RX_STATUS_ERROR_SENSE_SET; @@ -691,6 +683,7 @@ static void sqa_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) TRACE_ENTRY(); + cmd->state = QLA_TGT_STATE_DONE; cmd->trc_flags |= TRC_CMD_DONE; scst_tgt_cmd_done(scst_cmd, scst_work_context); @@ -1522,9 +1515,10 @@ static int sqa_xmit_response(struct scst_cmd *scst_cmd) cmd = scst_cmd_get_tgt_priv(scst_cmd); if (scst_cmd_aborted_on_xmit(scst_cmd)) { - TRACE_MGMT_DBG("sqatgt(%ld/%d): CMD_ABORTED cmd[%p]", - cmd->vha->host_no, cmd->vha->vp_idx, - cmd); + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: skipping send response for aborted cmd", + cmd->vha->host_no, cmd->vha->vp_idx, + scst_cmd_get_tag(scst_cmd)); qlt_abort_cmd(cmd); scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_ABORTED); scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_DIRECT); @@ -1841,72 +1835,94 @@ static int sqa_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd, return t32; } -static void sqa_cleanup_hw_pending_cmd(scsi_qla_host_t *vha, - struct qla_tgt_cmd *cmd) -{ - uint32_t h; - struct qla_qpair *qpair = cmd->qpair; - - for (h = 1; h < qpair->req->num_outstanding_cmds; h++) { - if (qpair->req->outstanding_cmds[h] == (srb_t *)cmd) { - printk(KERN_INFO "Clearing handle %d for cmd %p", h, - cmd); - //TRACE_DBG("Clearing handle %d for cmd %p", h, cmd); - qpair->req->outstanding_cmds[h] = NULL; - break; - } - } -} - static void sqa_on_hw_pending_cmd_timeout(struct scst_cmd *scst_cmd) { struct qla_tgt_cmd *cmd = scst_cmd_get_tgt_priv(scst_cmd); struct scsi_qla_host *vha = cmd->vha; struct qla_qpair *qpair = cmd->qpair; - uint8_t aborted = cmd->aborted; unsigned long flags; TRACE_ENTRY(); - TRACE_MGMT_DBG("sqatgt(%ld/%d): Cmd %p HW pending for too long (state %s) %s; %s;", - vha->host_no, vha->vp_idx, cmd, - cmdstate_to_str((uint8_t)cmd->state), - cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw", - aborted ? "aborted" : "not aborted"); - - qlt_abort_cmd(cmd); + scst_cmd_get(scst_cmd); spin_lock_irqsave(qpair->qp_lock_ptr, flags); + + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: HW pending for too long (state %s) %s; %s", + vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd), + cmdstate_to_str((uint8_t)cmd->state), + cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw", + cmd->aborted ? "aborted" : "not aborted"); + switch (cmd->state) { case QLA_TGT_STATE_NEW: case QLA_TGT_STATE_DATA_IN: - PRINT_ERROR("sqa(%ld): A command in state (%s) should not be HW pending. %s", - vha->host_no, cmdstate_to_str((uint8_t)cmd->state), - aborted ? "aborted" : "not aborted"); - break; + case QLA_TGT_STATE_DONE: + PRINT_ERROR( + "sqatgt(%ld/%d): tag %lld: A command in state (%s) should not be HW pending. %s", + vha->host_no, vha->vp_idx, scst_cmd_get_tag(scst_cmd), + cmdstate_to_str((uint8_t)cmd->state), + cmd->aborted ? "aborted" : "not aborted"); + goto out_unlock; case QLA_TGT_STATE_NEED_DATA: - /* the abort will nudge it out of FW */ - TRACE_MGMT_DBG("Force rx_data cmd %p", cmd); - sqa_cleanup_hw_pending_cmd(vha, cmd); - scst_set_cmd_error(scst_cmd, - SCST_LOAD_SENSE(scst_sense_internal_failure)); - scst_rx_data(scst_cmd, SCST_RX_STATUS_ERROR_SENSE_SET, - SCST_CONTEXT_THREAD); - break; case QLA_TGT_STATE_PROCESSED: - if (!cmd->cmd_sent_to_fw) - PRINT_ERROR("sqa(%ld): command should not be in HW pending. It's already processed. ", - vha->host_no); - else - TRACE_MGMT_DBG("Force finishing cmd %p", cmd); - sqa_cleanup_hw_pending_cmd(vha, cmd); - scst_set_delivery_status(scst_cmd, SCST_CMD_DELIVERY_FAILED); - scst_tgt_cmd_done(scst_cmd, SCST_CONTEXT_THREAD); break; } + + /* Handle race with normal CTIO completion. */ + if (!cmd->cmd_sent_to_fw) { + TRACE_MGMT_DBG( + "sqatgt(%ld/%d): tag %lld: cmd not sent to fw; assuming just completed", + vha->host_no, vha->vp_idx, + scst_cmd_get_tag(scst_cmd)); + goto out_unlock; + } + + /* The command should be aborted elsewhere if the ISP was reset. */ + if (!qpair->fw_started || cmd->reset_count != qpair->chip_reset) + goto out_unlock; + + /* Reset the ISP if there was a timeout after sending a term exchange. */ + if (cmd->sent_term_exchg && + time_is_before_jiffies(cmd->jiffies_at_term_exchg + + SQA_MAX_HW_PENDING_TIME * HZ / 2)) { + if (!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags)) { + if (IS_P3P_TYPE(vha->hw)) + set_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags); + else + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } + goto out_unlock; + } + + /* + * We still expect a CTIO response from the hw. Terminating the + * exchange should force the CTIO response to happen sooner. + */ + if (!cmd->sent_term_exchg) + qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1); + + /* + * Restart the timer so that this function is called again + * after another timeout. This is similar to + * scst_update_hw_pending_start() except that we also set + * cmd_hw_pending to 1. + * + * IRQs are already OFF. + */ + spin_lock(&scst_cmd->sess->sess_list_lock); + scst_cmd->cmd_hw_pending = 1; + scst_cmd->hw_pending_start = jiffies; + spin_unlock(&scst_cmd->sess->sess_list_lock); + +out_unlock: spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + scst_cmd_put(scst_cmd); + TRACE_EXIT(); } -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:40:26
|
(target mode) cmd->cmd_lock only protects cmd->aborted, but when deciding how to process a cmd, it is necessary to consider other factors such as cmd->state and if the chip has been reset, which are protected by qpair->qp_lock_ptr. So replace cmd_lock with qp_lock_ptr, whick makes it possible to check additional values and make decisions about what to do without racing with the CTIO handler and other code. - Lock cmd->qpair->qp_lock_ptr when aborting a cmd. - Eliminate cmd->cmd_lock and change cmd->aborted to a bitfield since it is now protected by qp_lock_ptr just like all the other flags. - Add another command state QLA_TGT_STATE_DONE to avoid any possible races between qlt_abort_cmd() and tgt_ops->free_cmd(). - Add the cmd->sent_term_exchg flag to indicate if qlt_send_term_exchange() has already been called. - Export qlt_send_term_exchange() for SCST so that it can be called directly instead of trying to make qlt_abort_cmd() work for both TMR abort and HW timeout. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: - On second timeout, reset the ISP rather than unmapping DMA that might be in use by the hardware. - Apply "scsi: qla2xxx: clear cmds after chip reset" from Dmitry Bogdanov as prerequisite. This is required for the ISP reset to clear the locked-up command. - Move the revert of 26f9ce53817a ("scsi: qla2xxx: Fix missed DMA unmap for aborted commands") from this patch to the previous patch since that patch fixed the oops, even though this patch is still necessary for its other improvements. Rename this patch and reword the patch description to match. - No longer export qlt_unmap_sg(). - Export qla2xxx_wake_dpc(). - Remove TRC_CTIO_IGNORED. drivers/scsi/qla2xxx/qla_os.c | 1 + drivers/scsi/qla2xxx/qla_target.c | 198 +++++++++++++---------------- drivers/scsi/qla2xxx/qla_target.h | 17 ++- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 2 + 4 files changed, 102 insertions(+), 116 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 739137ddfd68..2a3eb1dacf86 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7248,6 +7248,7 @@ qla2xxx_wake_dpc(struct scsi_qla_host *vha) if (!test_bit(UNLOADING, &vha->dpc_flags) && t) wake_up_process(t); } +EXPORT_SYMBOL(qla2xxx_wake_dpc); /* * qla2x00_rst_aen diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2abdb8ce0302..c6dc5e9efb69 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -104,8 +104,6 @@ static void qlt_response_pkt(struct scsi_qla_host *ha, struct rsp_que *rsp, response_t *pkt); static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun, int fn, void *iocb, int flags); -static void qlt_send_term_exchange(struct qla_qpair *, struct qla_tgt_cmd - *cmd, struct atio_from_isp *atio, int ha_locked, int ul_abort); static void qlt_alloc_qfull_cmd(struct scsi_qla_host *vha, struct atio_from_isp *atio, uint16_t status, int qfull); static void qlt_disable_vha(struct scsi_qla_host *vha); @@ -136,20 +134,6 @@ static struct workqueue_struct *qla_tgt_wq; static DEFINE_MUTEX(qla_tgt_mutex); static LIST_HEAD(qla_tgt_glist); -static const char *prot_op_str(u32 prot_op) -{ - switch (prot_op) { - case TARGET_PROT_NORMAL: return "NORMAL"; - case TARGET_PROT_DIN_INSERT: return "DIN_INSERT"; - case TARGET_PROT_DOUT_INSERT: return "DOUT_INSERT"; - case TARGET_PROT_DIN_STRIP: return "DIN_STRIP"; - case TARGET_PROT_DOUT_STRIP: return "DOUT_STRIP"; - case TARGET_PROT_DIN_PASS: return "DIN_PASS"; - case TARGET_PROT_DOUT_PASS: return "DOUT_PASS"; - default: return "UNKNOWN"; - } -} - /* This API intentionally takes dest as a parameter, rather than returning * int value to avoid caller forgetting to issue wmb() after the store */ void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest) @@ -252,7 +236,7 @@ static void qlt_queue_unknown_atio(scsi_qla_host_t *vha, return; out_term: - qlt_send_term_exchange(vha->hw->base_qpair, NULL, atio, ha_locked, 0); + qlt_send_term_exchange(vha->hw->base_qpair, NULL, atio, ha_locked); goto out; } @@ -271,7 +255,7 @@ static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha, "Freeing unknown %s %p, because of Abort\n", "ATIO_TYPE7", u); qlt_send_term_exchange(vha->hw->base_qpair, NULL, - &u->atio, ha_locked, 0); + &u->atio, ha_locked); goto abort; } @@ -285,7 +269,7 @@ static void qlt_try_to_dequeue_unknown_atios(struct scsi_qla_host *vha, "Freeing unknown %s %p, because tgt is being stopped\n", "ATIO_TYPE7", u); qlt_send_term_exchange(vha->hw->base_qpair, NULL, - &u->atio, ha_locked, 0); + &u->atio, ha_locked); } else { ql_dbg(ql_dbg_async + ql_dbg_verbose, vha, 0x503d, "Reschedule u %p, vha %p, host %p\n", u, vha, host); @@ -3461,7 +3445,6 @@ qlt_handle_dif_error(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, uint8_t *ep = &sts->expected_dif[0]; uint64_t lba = cmd->se_cmd.t_task_lba; uint8_t scsi_status, sense_key, asc, ascq; - unsigned long flags; struct scsi_qla_host *vha = cmd->vha; cmd->trc_flags |= TRC_DIF_ERR; @@ -3535,13 +3518,10 @@ qlt_handle_dif_error(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, vha->hw->tgt.tgt_ops->handle_data(cmd); break; default: - spin_lock_irqsave(&cmd->cmd_lock, flags); - if (cmd->aborted) { - spin_unlock_irqrestore(&cmd->cmd_lock, flags); + if (cmd->sent_term_exchg) { vha->hw->tgt.tgt_ops->free_cmd(cmd); break; } - spin_unlock_irqrestore(&cmd->cmd_lock, flags); qlt_send_resp_ctio(qpair, cmd, scsi_status, sense_key, asc, ascq); @@ -3696,9 +3676,22 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair, return 0; } -static void qlt_send_term_exchange(struct qla_qpair *qpair, - struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked, - int ul_abort) +/* + * Aborting a command that is active in the FW (i.e. cmd->cmd_sent_to_fw == 1) + * will usually trigger the FW to send a completion CTIO with error status, + * and the driver will then call the ->handle_data() or ->free_cmd() callbacks. + * This can be used to clear a command that is locked up in the FW unless there + * is something more seriously wrong. + * + * Aborting a command that is not active in the FW (i.e. + * cmd->cmd_sent_to_fw == 0) will not directly trigger any callbacks. Instead, + * when the target mode midlevel calls qlt_rdy_to_xfer() or + * qlt_xmit_response(), the driver will see that the cmd has been aborted and + * call the appropriate callback immediately without performing the requested + * operation. + */ +void qlt_send_term_exchange(struct qla_qpair *qpair, + struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked) { struct scsi_qla_host *vha; unsigned long flags = 0; @@ -3722,10 +3715,14 @@ static void qlt_send_term_exchange(struct qla_qpair *qpair, qlt_alloc_qfull_cmd(vha, atio, 0, 0); done: - if (cmd && !ul_abort && !cmd->aborted) { - if (cmd->sg_mapped) - qlt_unmap_sg(vha, cmd); - vha->hw->tgt.tgt_ops->free_cmd(cmd); + if (cmd) { + /* + * Set this even if -ENOMEM above, since term exchange will be + * sent eventually... + */ + cmd->sent_term_exchg = 1; + cmd->aborted = 1; + cmd->jiffies_at_term_exchg = jiffies; } if (!ha_locked) @@ -3733,6 +3730,7 @@ static void qlt_send_term_exchange(struct qla_qpair *qpair, return; } +EXPORT_SYMBOL(qlt_send_term_exchange); static void qlt_init_term_exchange(struct scsi_qla_host *vha) { @@ -3783,35 +3781,35 @@ static void qlt_chk_exch_leak_thresh_hold(struct scsi_qla_host *vha) int qlt_abort_cmd(struct qla_tgt_cmd *cmd) { - struct qla_tgt *tgt = cmd->tgt; - struct scsi_qla_host *vha = tgt->vha; - struct se_cmd *se_cmd = &cmd->se_cmd; + struct scsi_qla_host *vha = cmd->vha; + struct qla_qpair *qpair = cmd->qpair; unsigned long flags; - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, - "qla_target(%d): terminating exchange for aborted cmd=%p " - "(se_cmd=%p, tag=%llu)", vha->vp_idx, cmd, &cmd->se_cmd, - se_cmd->tag); + spin_lock_irqsave(qpair->qp_lock_ptr, flags); - spin_lock_irqsave(&cmd->cmd_lock, flags); - if (cmd->aborted) { - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - /* - * It's normal to see 2 calls in this path: - * 1) XFER Rdy completion + CMD_T_ABORT - * 2) TCM TMR - drain_state_list - */ - ql_dbg(ql_dbg_tgt_mgt, vha, 0xf016, - "multiple abort. %p transport_state %x, t_state %x, " - "se_cmd_flags %x\n", cmd, cmd->se_cmd.transport_state, - cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); - return -EIO; + ql_dbg(ql_dbg_tgt_mgt, vha, 0xf014, + "qla_target(%d): tag %lld: cmd being aborted (state %d) %s; %s\n", + vha->vp_idx, cmd->se_cmd.tag, cmd->state, + cmd->cmd_sent_to_fw ? "sent to fw" : "not sent to fw", + cmd->aborted ? "aborted" : "not aborted"); + + if (cmd->state != QLA_TGT_STATE_DONE && !cmd->sent_term_exchg) { + if (!qpair->fw_started || + cmd->reset_count != qpair->chip_reset) { + /* + * Chip was reset; just pretend that we sent the term + * exchange. + */ + cmd->sent_term_exchg = 1; + cmd->aborted = 1; + cmd->jiffies_at_term_exchg = jiffies; + } else { + qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1); + } } - cmd->aborted = 1; - cmd->trc_flags |= TRC_ABORT; - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - qlt_send_term_exchange(cmd->qpair, cmd, &cmd->atio, 0, 1); + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + return 0; } EXPORT_SYMBOL(qlt_abort_cmd); @@ -3842,40 +3840,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) } EXPORT_SYMBOL(qlt_free_cmd); -/* - * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire - */ -static int qlt_term_ctio_exchange(struct qla_qpair *qpair, void *ctio, - struct qla_tgt_cmd *cmd, uint32_t status) -{ - int term = 0; - struct scsi_qla_host *vha = qpair->vha; - - if (cmd->se_cmd.prot_op) - ql_dbg(ql_dbg_tgt_dif, vha, 0xe013, - "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] " - "se_cmd=%p tag[%x] op %#x/%s", - cmd->lba, cmd->lba, - cmd->num_blks, &cmd->se_cmd, - cmd->atio.u.isp24.exchange_addr, - cmd->se_cmd.prot_op, - prot_op_str(cmd->se_cmd.prot_op)); - - if (ctio != NULL) { - struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio; - - term = !(c->flags & - cpu_to_le16(OF_TERM_EXCH)); - } else - term = 1; - - if (term) - qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1, 0); - - return term; -} - - /* ha->hardware_lock supposed to be held on entry */ static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha, struct rsp_que *rsp, uint32_t handle, void *ctio) @@ -3981,22 +3945,35 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, qlt_unmap_sg(vha, cmd); if (unlikely(status != CTIO_SUCCESS)) { + bool term_exchg = false; + + /* + * If the hardware terminated the exchange, then we don't need + * to send an explicit term exchange message. + */ + if (ctio_flags & OF_TERM_EXCH) { + cmd->sent_term_exchg = 1; + cmd->aborted = 1; + cmd->jiffies_at_term_exchg = jiffies; + } + switch (status & 0xFFFF) { case CTIO_INVALID_RX_ID: + term_exchg = true; if (printk_ratelimit()) dev_info(&vha->hw->pdev->dev, "qla_target(%d): CTIO with INVALID_RX_ID ATIO attr %x CTIO Flags %x|%x\n", vha->vp_idx, cmd->atio.u.isp24.attr, ((cmd->ctio_flags >> 9) & 0xf), cmd->ctio_flags); - break; + case CTIO_LIP_RESET: case CTIO_TARGET_RESET: case CTIO_ABORTED: - /* driver request abort via Terminate exchange */ + term_exchg = true; + fallthrough; case CTIO_TIMEOUT: - /* They are OK */ ql_dbg(ql_dbg_tgt_mgt, vha, 0xf058, "qla_target(%d): CTIO with " "status %#x received, state %x, se_cmd %p, " @@ -4017,6 +3994,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, logged_out ? "PORT LOGGED OUT" : "PORT UNAVAILABLE", status, cmd->state, se_cmd); + term_exchg = true; if (logged_out && cmd->sess) { /* * Session is already logged out, but we need @@ -4062,19 +4040,21 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, break; } + cmd->trc_flags |= TRC_CTIO_ERR; - /* "cmd->aborted" means - * cmd is already aborted/terminated, we don't - * need to terminate again. The exchange is already - * cleaned up/freed at FW level. Just cleanup at driver - * level. + /* + * In state QLA_TGT_STATE_NEED_DATA the failed CTIO was for + * Data-Out, so either abort the exchange or try sending check + * condition with sense data depending on the severity of + * the error. In state QLA_TGT_STATE_PROCESSED the failed CTIO + * was for status (and possibly Data-In), so don't try sending + * an error status again in that case (if the error was for + * Data-In with status, we could try sending status without + * Data-In, but we don't do that currently). */ - if ((cmd->state != QLA_TGT_STATE_NEED_DATA) && - (!cmd->aborted)) { - cmd->trc_flags |= TRC_CTIO_ERR; - if (qlt_term_ctio_exchange(qpair, ctio, cmd, status)) - return; - } + if (!cmd->sent_term_exchg && + (term_exchg || cmd->state != QLA_TGT_STATE_NEED_DATA)) + qlt_send_term_exchange(qpair, cmd, &cmd->atio, 1); } if (cmd->state == QLA_TGT_STATE_PROCESSED) { @@ -4164,7 +4144,6 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) goto out_term; } - spin_lock_init(&cmd->cmd_lock); cdb = &atio->u.isp24.fcp_cmnd.cdb[0]; cmd->se_cmd.tag = le32_to_cpu(atio->u.isp24.exchange_addr); @@ -4201,7 +4180,7 @@ static void __qlt_do_work(struct qla_tgt_cmd *cmd) */ cmd->trc_flags |= TRC_DO_WORK_ERR; spin_lock_irqsave(qpair->qp_lock_ptr, flags); - qlt_send_term_exchange(qpair, NULL, &cmd->atio, 1, 0); + qlt_send_term_exchange(qpair, NULL, &cmd->atio, 1); qlt_decr_num_pend_cmds(vha); cmd->vha->hw->tgt.tgt_ops->rel_cmd(cmd); @@ -5360,6 +5339,7 @@ static void qlt_handle_imm_notify(struct scsi_qla_host *vha, if (qlt_24xx_handle_els(vha, iocb) == 0) send_notify_ack = 0; break; + default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf06d, "qla_target(%d): Received unknown immediate " @@ -5394,7 +5374,7 @@ static int __qlt_send_busy(struct qla_qpair *qpair, sess = qla2x00_find_fcport_by_nportid(vha, &id, 1); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); if (!sess) { - qlt_send_term_exchange(qpair, NULL, atio, 1, 0); + qlt_send_term_exchange(qpair, NULL, atio, 1); return 0; } /* Sending marker isn't necessary, since we called from ISR */ @@ -5623,7 +5603,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, ql_dbg(ql_dbg_tgt, vha, 0xe05f, "qla_target: Unable to send command to target, sending TERM EXCHANGE for rsp\n"); qlt_send_term_exchange(ha->base_qpair, NULL, - atio, 1, 0); + atio, 1); break; case -EBUSY: ql_dbg(ql_dbg_tgt, vha, 0xe060, @@ -5830,7 +5810,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, ql_dbg(ql_dbg_tgt, vha, 0xe05f, "qla_target: Unable to send command to target, sending TERM EXCHANGE for rsp\n"); qlt_send_term_exchange(rsp->qpair, NULL, - atio, 1, 0); + atio, 1); break; case -EBUSY: ql_dbg(ql_dbg_tgt, vha, 0xe060, @@ -6720,7 +6700,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked) adjust_corrupted_atio(pkt); qlt_send_term_exchange(ha->base_qpair, NULL, pkt, - ha_locked, 0); + ha_locked); } else { qlt_24xx_atio_pkt_all_vps(vha, (struct atio_from_isp *)pkt, ha_locked); diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index c483966d0a84..eb15d8e9f79e 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -754,6 +754,7 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define QLA_TGT_STATE_NEED_DATA 1 /* target needs data to continue */ #define QLA_TGT_STATE_DATA_IN 2 /* Data arrived + target processing */ #define QLA_TGT_STATE_PROCESSED 3 /* target done processing */ +#define QLA_TGT_STATE_DONE 4 /* cmd being freed */ /* ATIO task_codes field */ #define ATIO_SIMPLE_QUEUE 0 @@ -876,8 +877,6 @@ struct qla_tgt_cmd { /* Sense buffer that will be mapped into outgoing status */ unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; - spinlock_t cmd_lock; - /* to save extra sess dereferences */ unsigned int conf_compl_supported:1; unsigned int sg_mapped:1; unsigned int write_data_transferred:1; @@ -887,13 +886,14 @@ struct qla_tgt_cmd { unsigned int cmd_in_wq:1; unsigned int edif:1; + /* Set if the exchange has been terminated. */ + unsigned int sent_term_exchg:1; + /* - * This variable may be set from outside the LIO and I/O completion - * callback functions. Do not declare this member variable as a - * bitfield to avoid a read-modify-write operation when this variable - * is set. + * Set if sent_term_exchg is set, or if the cmd was aborted by a TMR, + * or if some other error prevents normal processing of the command. */ - unsigned int aborted; + unsigned int aborted:1; struct scatterlist *sg; /* cmd data buffer SG vector */ int sg_cnt; /* SG segments count */ @@ -932,6 +932,7 @@ struct qla_tgt_cmd { #define DIF_BUNDL_DMA_VALID 1 uint16_t prot_flags; + unsigned long jiffies_at_term_exchg; uint64_t jiffies_at_alloc; uint64_t jiffies_at_free; @@ -1055,6 +1056,8 @@ extern void qlt_response_pkt_all_vps(struct scsi_qla_host *, struct rsp_que *, extern int qlt_rdy_to_xfer(struct qla_tgt_cmd *); extern int qlt_xmit_response(struct qla_tgt_cmd *, int, uint8_t); extern int qlt_abort_cmd(struct qla_tgt_cmd *); +void qlt_send_term_exchange(struct qla_qpair *qpair, + struct qla_tgt_cmd *cmd, struct atio_from_isp *atio, int ha_locked); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index ceaf1c7b1d17..169219fe47a2 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -303,6 +303,8 @@ static void tcm_qla2xxx_rel_cmd(struct qla_tgt_cmd *cmd) */ static void tcm_qla2xxx_free_cmd(struct qla_tgt_cmd *cmd) { + cmd->state = QLA_TGT_STATE_DONE; + cmd->qpair->tgt_counters.core_qla_free_cmd++; cmd->cmd_in_wq = 1; -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:38:56
|
Commit aefed3e5548f ("scsi: qla2xxx: target: Fix offline port handling and host reset handling") caused two problems: 1. Commands sent to FW, after chip reset got stuck and never freed as FW is not going to respond to them anymore. 2. BUG_ON(cmd->sg_mapped) in qlt_free_cmd(). Commit 26f9ce53817a ("scsi: qla2xxx: Fix missed DMA unmap for aborted commands") attempted to fix this, but introduced another bug under different circumstances when two different CPUs were racing to call qlt_unmap_sg() at the same time: BUG_ON(!valid_dma_direction(dir)) in dma_unmap_sg_attrs(). So revert "scsi: qla2xxx: Fix missed DMA unmap for aborted commands" and partially revert "scsi: qla2xxx: target: Fix offline port handling and host reset handling" at __qla2x00_abort_all_cmds. Fixes: aefed3e5548f ("scsi: qla2xxx: target: Fix offline port handling and host reset handling") Fixes: 26f9ce53817a ("scsi: qla2xxx: Fix missed DMA unmap for aborted commands") Signed-off-by: Tony Battersby <to...@cy...> Signed-off-by: Dmitry Bogdanov <d.b...@ya...> --- v1 -> v2: This patch is a new addition in v2. drivers/scsi/qla2xxx/qla_os.c | 20 ++++++++++++++++++-- drivers/scsi/qla2xxx/qla_target.c | 5 +---- drivers/scsi/qla2xxx/qla_target.h | 1 + 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index f0b77f13628d..739137ddfd68 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1875,10 +1875,26 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) continue; } cmd = (struct qla_tgt_cmd *)sp; - cmd->aborted = 1; + + if (cmd->sg_mapped) + qlt_unmap_sg(vha, cmd); + + if (cmd->state == QLA_TGT_STATE_NEED_DATA) { + cmd->aborted = 1; + cmd->write_data_transferred = 0; + cmd->state = QLA_TGT_STATE_DATA_IN; + ha->tgt.tgt_ops->handle_data(cmd); + } else { + ha->tgt.tgt_ops->free_cmd(cmd); + } break; case TYPE_TGT_TMCMD: - /* Skip task management functions. */ + /* + * Currently, only ABTS response gets on the + * outstanding_cmds[] + */ + ha->tgt.tgt_ops->free_mcmd( + (struct qla_tgt_mgmt_cmd *) sp); break; default: break; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b700bfc642b3..2abdb8ce0302 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2447,7 +2447,7 @@ static int qlt_pci_map_calc_cnt(struct qla_tgt_prm *prm) return -1; } -static void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd) +void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd) { struct qla_hw_data *ha; struct qla_qpair *qpair; @@ -3795,9 +3795,6 @@ int qlt_abort_cmd(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&cmd->cmd_lock, flags); if (cmd->aborted) { - if (cmd->sg_mapped) - qlt_unmap_sg(vha, cmd); - spin_unlock_irqrestore(&cmd->cmd_lock, flags); /* * It's normal to see 2 calls in this path: diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 15a59c125c53..c483966d0a84 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -1058,6 +1058,7 @@ extern int qlt_abort_cmd(struct qla_tgt_cmd *); extern void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *); extern void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *); extern void qlt_free_cmd(struct qla_tgt_cmd *cmd); +extern void qlt_unmap_sg(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd); extern void qlt_async_event(uint16_t, struct scsi_qla_host *, uint16_t *); extern void qlt_enable_vha(struct scsi_qla_host *); extern void qlt_vport_create(struct scsi_qla_host *, struct qla_hw_data *); -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:37:40
|
(target mode) Properly set the nport_handle field of the terminate exchange message. Previously when this field was not set properly, the term exchange would fail when cmd_sent_to_fw == 1 but work when cmd_sent_to_fw == 0 (i.e. it would fail when the HW was actively transferring data or status for the cmd but work when the HW was idle). With this change, term exchange works in any cmd state, which now makes it possible to abort a command that is locked up in the HW. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_target.c | 52 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 72c74f8f5375..b700bfc642b3 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3622,14 +3622,35 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, struct atio_from_isp *atio) { - struct scsi_qla_host *vha = qpair->vha; struct ctio7_to_24xx *ctio24; - request_t *pkt; - int ret = 0; + struct scsi_qla_host *vha; + uint16_t loop_id; uint16_t temp; - if (cmd) + if (cmd) { vha = cmd->vha; + loop_id = cmd->loop_id; + } else { + port_id_t id = be_to_port_id(atio->u.isp24.fcp_hdr.s_id); + struct qla_hw_data *ha; + struct fc_port *sess; + unsigned long flags; + + vha = qpair->vha; + ha = vha->hw; + + /* + * CTIO7_NHANDLE_UNRECOGNIZED works when aborting an idle + * command but not when aborting a command with an active CTIO + * exchange. + */ + loop_id = CTIO7_NHANDLE_UNRECOGNIZED; + spin_lock_irqsave(&ha->tgt.sess_lock, flags); + sess = qla2x00_find_fcport_by_nportid(vha, &id, 1); + if (sess) + loop_id = sess->loop_id; + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + } if (cmd) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xe009, @@ -3642,31 +3663,20 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair, vha->vp_idx, le32_to_cpu(atio->u.isp24.exchange_addr)); } - pkt = (request_t *)qla2x00_alloc_iocbs_ready(qpair, NULL); - if (pkt == NULL) { + ctio24 = qla2x00_alloc_iocbs_ready(qpair, NULL); + if (!ctio24) { ql_dbg(ql_dbg_tgt, vha, 0xe050, "qla_target(%d): %s failed: unable to allocate " "request packet\n", vha->vp_idx, __func__); return -ENOMEM; } - if (cmd != NULL) { - if (cmd->state < QLA_TGT_STATE_PROCESSED) { - ql_dbg(ql_dbg_tgt, vha, 0xe051, - "qla_target(%d): Terminating cmd %p with " - "incorrect state %d\n", vha->vp_idx, cmd, - cmd->state); - } else - ret = 1; - } - qpair->tgt_counters.num_term_xchg_sent++; - pkt->entry_count = 1; - pkt->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; - ctio24 = (struct ctio7_to_24xx *)pkt; ctio24->entry_type = CTIO_TYPE7; - ctio24->nport_handle = cpu_to_le16(CTIO7_NHANDLE_UNRECOGNIZED); + ctio24->entry_count = 1; + ctio24->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; + ctio24->nport_handle = cpu_to_le16(loop_id); ctio24->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); ctio24->vp_index = vha->vp_idx; ctio24->initiator_id = be_id_to_le(atio->u.isp24.fcp_hdr.s_id); @@ -3683,7 +3693,7 @@ static int __qlt_send_term_exchange(struct qla_qpair *qpair, qpair->reqq_start_iocbs(qpair); else qla2x00_start_iocbs(vha, qpair->req); - return ret; + return 0; } static void qlt_send_term_exchange(struct qla_qpair *qpair, -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:35:30
|
(target mode) As far as I can tell, CONTINUE_TGT_IO_TYPE and CTIO_A64_TYPE are message types from non-FWI2 boards (older than ISP24xx), which are not supported by qla_target.c. Removing them makes it possible to turn a void * into the real type and avoid some typecasts. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_target.c | 32 +++++-------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 1e81582085e3..df5c9aac5617 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3913,7 +3913,8 @@ static void *qlt_ctio_to_cmd(struct scsi_qla_host *vha, * ha->hardware_lock supposed to be held on entry. Might drop it, then reaquire */ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, - struct rsp_que *rsp, uint32_t handle, uint32_t status, void *ctio) + struct rsp_que *rsp, uint32_t handle, uint32_t status, + struct ctio7_from_24xx *ctio) { struct qla_hw_data *ha = vha->hw; struct se_cmd *se_cmd; @@ -3934,11 +3935,8 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, if (cmd == NULL) return; - if ((le16_to_cpu(((struct ctio7_from_24xx *)ctio)->flags) & CTIO7_FLAGS_DATA_OUT) && - cmd->sess) { - qlt_chk_edif_rx_sa_delete_pending(vha, cmd->sess, - (struct ctio7_from_24xx *)ctio); - } + if ((le16_to_cpu(ctio->flags) & CTIO7_FLAGS_DATA_OUT) && cmd->sess) + qlt_chk_edif_rx_sa_delete_pending(vha, cmd->sess, ctio); se_cmd = &cmd->se_cmd; cmd->cmd_sent_to_fw = 0; @@ -4007,7 +4005,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, *((u64 *)&crc->actual_dif[0]), *((u64 *)&crc->expected_dif[0])); - qlt_handle_dif_error(qpair, cmd, ctio); + qlt_handle_dif_error(qpair, cmd, crc); return; } @@ -5816,26 +5814,6 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, } break; - case CONTINUE_TGT_IO_TYPE: - { - struct ctio_to_2xxx *entry = (struct ctio_to_2xxx *)pkt; - - qlt_do_ctio_completion(vha, rsp, entry->handle, - le16_to_cpu(entry->status)|(pkt->entry_status << 16), - entry); - break; - } - - case CTIO_A64_TYPE: - { - struct ctio_to_2xxx *entry = (struct ctio_to_2xxx *)pkt; - - qlt_do_ctio_completion(vha, rsp, entry->handle, - le16_to_cpu(entry->status)|(pkt->entry_status << 16), - entry); - break; - } - case IMMED_NOTIFY_TYPE: ql_dbg(ql_dbg_tgt, vha, 0xe035, "%s", "IMMED_NOTIFY\n"); qlt_handle_imm_notify(vha, (struct imm_ntfy_from_isp *)pkt); -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:34:24
|
If a mailbox command completes immediately after wait_for_completion_timeout() times out, ha->mbx_intr_comp could be left in an inconsistent state, causing the next mailbox command not to wait for the hardware. Fix by reinitializing the completion before use. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_mbx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 32eb0ce8b170..1f01576f044b 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -253,6 +253,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) /* Issue set host interrupt command to send cmd out. */ ha->flags.mbox_int = 0; clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); + reinit_completion(&ha->mbx_intr_comp); /* Unlock mbx registers and wait for interrupt */ ql_dbg(ql_dbg_mbx, vha, 0x100f, @@ -279,6 +280,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) "cmd=%x Timeout.\n", command); spin_lock_irqsave(&ha->hardware_lock, flags); clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); + reinit_completion(&ha->mbx_intr_comp); spin_unlock_irqrestore(&ha->hardware_lock, flags); if (chip_reset != ha->chip_reset) { -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:32:14
|
When given the module parameter qlini_mode=exclusive, qla2xxx in initiator mode is initially unable to successfully send SCSI commands to devices it finds while scanning, resulting in an escalating series of resets until an adapter reset clears the issue. Fix by checking the active mode instead of the module parameter. Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_os.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index a52293972e10..f0b77f13628d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3438,13 +3438,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->mqenable = 0; if (ha->mqenable) { - bool startit = false; - - if (QLA_TGT_MODE_ENABLED()) - startit = false; - - if (ql2x_ini_mode == QLA2XXX_INI_MODE_ENABLED) - startit = true; + bool startit = !!(host->active_mode & MODE_INITIATOR); /* Create start of day qpairs for Block MQ */ for (i = 0; i < ha->max_qpairs; i++) -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:30:40
|
This reverts commit 0367076b0817d5c75dfb83001ce7ce5c64d803a9. The commit being reverted added code to __qla2x00_abort_all_cmds() to call sp->done() without holding a spinlock. But unlike the older code below it, this new code failed to check sp->cmd_type and just assumed TYPE_SRB, which results in a jump to an invalid pointer in target-mode with TYPE_TGT_CMD: qla2xxx [0000:65:00.0]-d034:8: qla24xx_do_nack_work create sess success 0000000009f7a79b qla2xxx [0000:65:00.0]-5003:8: ISP System Error - mbx1=1ff5h mbx2=10h mbx3=0h mbx4=0h mbx5=191h mbx6=0h mbx7=0h. qla2xxx [0000:65:00.0]-d01e:8: -> fwdump no buffer qla2xxx [0000:65:00.0]-f03a:8: qla_target(0): System error async event 0x8002 occurred qla2xxx [0000:65:00.0]-00af:8: Performing ISP error recovery - ha=0000000058183fda. BUG: kernel NULL pointer dereference, address: 0000000000000000 PF: supervisor instruction fetch in kernel mode PF: error_code(0x0010) - not-present page PGD 0 P4D 0 Oops: 0010 [#1] SMP CPU: 2 PID: 9446 Comm: qla2xxx_8_dpc Tainted: G O 6.1.133 #1 Hardware name: Supermicro Super Server/X11SPL-F, BIOS 4.2 12/15/2023 RIP: 0010:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0018:ffffc90001f93dc8 EFLAGS: 00010206 RAX: 0000000000000282 RBX: 0000000000000355 RCX: ffff88810d16a000 RDX: ffff88810dbadaa8 RSI: 0000000000080000 RDI: ffff888169dc38c0 RBP: ffff888169dc38c0 R08: 0000000000000001 R09: 0000000000000045 R10: ffffffffa034bdf0 R11: 0000000000000000 R12: ffff88810800bb40 R13: 0000000000001aa8 R14: ffff888100136610 R15: ffff8881070f7400 FS: 0000000000000000(0000) GS:ffff88bf80080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 000000010c8ff006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? __die+0x4d/0x8b ? page_fault_oops+0x91/0x180 ? trace_buffer_unlock_commit_regs+0x38/0x1a0 ? exc_page_fault+0x391/0x5e0 ? asm_exc_page_fault+0x22/0x30 __qla2x00_abort_all_cmds+0xcb/0x3e0 [qla2xxx_scst] qla2x00_abort_all_cmds+0x50/0x70 [qla2xxx_scst] qla2x00_abort_isp_cleanup+0x3b7/0x4b0 [qla2xxx_scst] qla2x00_abort_isp+0xfd/0x860 [qla2xxx_scst] qla2x00_do_dpc+0x581/0xa40 [qla2xxx_scst] kthread+0xa8/0xd0 </TASK> Then commit 4475afa2646d ("scsi: qla2xxx: Complete command early within lock") added the spinlock back, because not having the lock caused a race and a crash. But qla2x00_abort_srb() in the switch below already checks for qla2x00_chip_is_down() and handles it the same way, so the code above the switch is now redundant and still buggy in target-mode. Remove it. Cc: st...@vg... Signed-off-by: Tony Battersby <to...@cy...> --- v1 -> v2: no changes drivers/scsi/qla2xxx/qla_os.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d4b484c0fd9d..a52293972e10 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1862,12 +1862,6 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { - if (qla2x00_chip_is_down(vha)) { - req->outstanding_cmds[cnt] = NULL; - sp->done(sp, res); - continue; - } - switch (sp->cmd_type) { case TYPE_SRB: qla2x00_abort_srb(qp, sp, res, &flags); -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-29 14:28:52
|
v1 -> v2 - Add new patch "scsi: qla2xxx: clear cmds after chip reset" suggested by Dmitry Bogdanov. - Rename "scsi: qla2xxx: fix oops during cmd abort" to "scsi: qla2xxx: fix races with aborting commands" and make SCST reset the ISP on a HW timeout instead of unmapping DMA that might still be in use. - Fix "scsi: qla2xxx: fix TMR failure handling" to free mcmds properly for LIO. - In "scsi: qla2xxx: add back SRR support", detect more buggy HBA fw versions based on the fw release notes. - Shorten code comment in "scsi: qla2xxx: improve safety of cmd lookup by handle" and improve patch description. - Rebase other patches as needed. v1: https://lore.kernel.org/r/f89...@cy.../ This patch series improves the qla2xxx FC driver in target mode. I developed these patches using the out-of-tree SCST target-mode subsystem (https://scst.sourceforge.net/), although most of the improvements will also apply to the other target-mode subsystems such as the in-tree LIO. Unfortunately qla2xxx+LIO does not pass all of my tests, but my patches do not make it any worse (results below). These patches have been well-tested at my employer with qla2xxx+SCST in both initiator mode and target mode and with a variety of FC HBAs and initiators. Since SCST is out-of-tree, some of the patches have parts that apply in-tree and other parts that apply out-of-tree to SCST. I am going to include the out-of-tree SCST patches to provide additional context; feel free to ignore them if you are not interested. All patches apply to linux 6.17 and SCST 3.10 master branch. Summary of patches: - bugfixes - cleanups - improve handling of aborts and task management requests - improve log message - add back SLER / SRR support (removed in 2017) Some of these patches improve handling of aborts and task management requests. This is some of the testing that I did: Test 1: Use /dev/sg to queue random disk I/O with short timeouts; make sure cmds are aborted successfully. Test 2: Queue lots of disk I/O, then use "sg_reset -N -d /dev/sg" on initiator to reset logical unit. Test 3: Queue lots of disk I/O, then use "sg_reset -N -t /dev/sg" on initiator to reset target. Test 4: Queue lots of disk I/O, then use "sg_reset -N -b /dev/sg" on initiator to reset bus. Test 5: Queue lots of disk I/O, then use "sg_reset -N -H /dev/sg" on initiator to reset host. Test 6: Use fiber channel attenuator to trigger SRR during write/read/compare test; check data integrity. With my patches, SCST passes all of these tests. Results with in-tree LIO target-mode subsystem: Test 1: Seems to abort the same cmd multiple times (both qlt_24xx_retry_term_exchange() and __qlt_send_term_exchange()). But cmds get aborted, so give it a pass? Test 2: Seems to work; cmds are aborted. Test 3: Target reset doesn't seem to abort cmds, instead, a few seconds later: qla2xxx [0000:04:00.0]-f058:9: qla_target(0): tag 1314312, op 2a: CTIO with TIMEOUT status 0xb received (state 1, port 51:40:2e:c0:18:1d:9f:cc, LUN 0) Tests 4 and 5: The initiator is unable to log back in to the target; the following messages are repeated over and over on the target: qla2xxx [0000:04:00.0]-e01c:9: Sending TERM ELS CTIO (ha=00000000f8811390) qla2xxx [0000:04:00.0]-f097:9: Linking sess 000000008df5aba8 [0] wwn 51:40:2e:c0:18:1d:9f:cc with PLOGI ACK to wwn 51:40:2e:c0:18:1d:9f:cc s_id 00:00:01, ref=2 pla 00000000835a9271 link 0 Test 6: passes with my patches; SRR not supported previously. So qla2xxx+LIO seems a bit flaky when handling exceptions, but my patches do not make it any worse. Perhaps someone who is more familiar with LIO can look at the difference between LIO and SCST and figure out how to improve it. Tony Battersby https://www.cybernetics.com/ Tony Battersby (16): Revert "scsi: qla2xxx: Perform lockless command completion in abort path" scsi: qla2xxx: fix initiator mode with qlini_mode=exclusive scsi: qla2xxx: fix lost interrupts with qlini_mode=disabled scsi: qla2xxx: use reinit_completion on mbx_intr_comp scsi: qla2xxx: remove code for unsupported hardware scsi: qla2xxx: improve debug output for term exchange scsi: qla2xxx: fix term exchange when cmd_sent_to_fw == 1 scsi: qla2xxx: clear cmds after chip reset scsi: qla2xxx: fix races with aborting commands scsi: qla2xxx: improve checks in qlt_xmit_response / qlt_rdy_to_xfer scsi: qla2xxx: fix TMR failure handling scsi: qla2xxx: fix invalid memory access with big CDBs scsi: qla2xxx: add cmd->rsp_sent scsi: qla2xxx: improve cmd logging scsi: qla2xxx: add back SRR support scsi: qla2xxx: improve safety of cmd lookup by handle drivers/scsi/qla2xxx/qla_dbg.c | 3 +- drivers/scsi/qla2xxx/qla_def.h | 1 - drivers/scsi/qla2xxx/qla_gbl.h | 2 +- drivers/scsi/qla2xxx/qla_init.c | 1 + drivers/scsi/qla2xxx/qla_isr.c | 32 +- drivers/scsi/qla2xxx/qla_mbx.c | 2 + drivers/scsi/qla2xxx/qla_mid.c | 4 +- drivers/scsi/qla2xxx/qla_os.c | 35 +- drivers/scsi/qla2xxx/qla_target.c | 1775 +++++++++++++++++++++++----- drivers/scsi/qla2xxx/qla_target.h | 112 +- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 17 + 11 files changed, 1646 insertions(+), 338 deletions(-) base-commit: e5f0a698b34ed76002dc5cff3804a61c80233a7a -- 2.43.0 |
From: Tony B. <to...@cy...> - 2025-09-25 19:31:05
|
On 9/25/25 13:00, Tony Battersby wrote: > On 9/25/25 12:04, Tony Battersby wrote: >> On 9/25/25 11:30, Xose Vazquez Perez wrote: >>> On 9/25/25 2:49 PM, Xose Vazquez Perez wrote: >>> >>>> If you want to review the firmware changelog, mainly: FCD-1183 (FCD-371, ER147301), FCD-259, ER146998 >>>> (from 9.00.00 to 9.15.05 [06/10/25]): >>>> https://www.marvell.com/content/dam/marvell/en/drivers/2025-06-10-release/fw_release_notes/Fibre_Channel_Firmware_Release_Notes.pdf >>>> >>>> It's look like all 2{678}xx devices/chips are affected by this bug. >>>> Perhaps the Marvel crew could provide more information on this. >>> 267x, or older, is still on 8.08, so apparently it's free of this bug: >>> https://www.marvell.com/content/dam/marvell/en/drivers/release-matrix/release-matrix-qlogic-fc-sw-posting-by-release-matrix.pdf >>> >>> 2870 / 2770 : 9.15.06 FW >>> 2740 / 2760 / 269x : 9.15.01 FW >>> 267x : 8.08.231 FW >> I am still trying to figure out what macros to use to test for the >> affected HBAs. So far I have: >> >> if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) >> >> But all the ISP numbers are pretty confusing. I have a number of 8, 16, >> 32, and 64 Gbps HBAs lying around to test, but I am sure I don't have >> every possible model. >> >> There are a number of items under "Changes and Fixes from v9.08.00 to >> v9.09.00" that might be related to the problem that I was experiencing. >> For example FCD-1076 sounds similar except that the SRR problem was with >> the CTIO queue rather than the ATIO queue. I could expand the "bad >> firmware" versions to include v9.04.00 - v9.08.00, since v9.04.00 >> introduced ER147301 and v9.09.00 fixed FCD-1183. >> >> Thanks, >> Tony >> > This is what I found by checking the PCI IDs on some of my HBAs: > > not affected > QLE2672 16 Gbps ISP2031 8.08.231 FW > > affected > QLE2694 16 Gpbs ISP2071 (tested) > QLE2742 32 Gbps ISP2261 (not tested) > QLE2872 64 Gpbs ISP2281 (not tested) > > So the following check should cover them: > > if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) > > Tony > Here is the updated version of the function: /* * Return true if the HBA firmware version is known to have bugs that * prevent Sequence Level Error Recovery (SLER) / Sequence Retransmission * Request (SRR) from working. * * Some bad versions are based on testing and some are based on "Marvell Fibre * Channel Firmware Release Notes". */ static bool qlt_has_sler_fw_bug(struct qla_hw_data *ha) { bool has_sler_fw_bug = false; if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) { /* * In the fw release notes: * ER147301 was added to v9.05.00 causing SLER regressions * FCD-259 was fixed in v9.08.00 * FCD-371 was fixed in v9.08.00 * FCD-1183 was fixed in v9.09.00 * * QLE2694L (ISP2071) known bad firmware (tested): * 9.06.02 * 9.07.00 * 9.08.02 * SRRs trigger hundreds of bogus entries in the response * queue and various other problems. * * QLE2694L known good firmware (tested): * 8.08.05 * 9.09.00 * * Suspected bad firmware (not confirmed by testing): * v9.05.xx * * unknown firmware: * 9.00.00 - 9.04.xx */ if (ha->fw_major_version == 9 && ha->fw_minor_version >= 5 && ha->fw_minor_version <= 8) has_sler_fw_bug = true; } return has_sler_fw_bug; } |
From: Tony B. <to...@cy...> - 2025-09-25 17:01:00
|
On 9/25/25 12:04, Tony Battersby wrote: > On 9/25/25 11:30, Xose Vazquez Perez wrote: >> On 9/25/25 2:49 PM, Xose Vazquez Perez wrote: >> >>> If you want to review the firmware changelog, mainly: FCD-1183 (FCD-371, ER147301), FCD-259, ER146998 >>> (from 9.00.00 to 9.15.05 [06/10/25]): >>> https://www.marvell.com/content/dam/marvell/en/drivers/2025-06-10-release/fw_release_notes/Fibre_Channel_Firmware_Release_Notes.pdf >>> >>> It's look like all 2{678}xx devices/chips are affected by this bug. >>> Perhaps the Marvel crew could provide more information on this. >> 267x, or older, is still on 8.08, so apparently it's free of this bug: >> https://www.marvell.com/content/dam/marvell/en/drivers/release-matrix/release-matrix-qlogic-fc-sw-posting-by-release-matrix.pdf >> >> 2870 / 2770 : 9.15.06 FW >> 2740 / 2760 / 269x : 9.15.01 FW >> 267x : 8.08.231 FW > I am still trying to figure out what macros to use to test for the > affected HBAs. So far I have: > > if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) > > But all the ISP numbers are pretty confusing. I have a number of 8, 16, > 32, and 64 Gbps HBAs lying around to test, but I am sure I don't have > every possible model. > > There are a number of items under "Changes and Fixes from v9.08.00 to > v9.09.00" that might be related to the problem that I was experiencing. > For example FCD-1076 sounds similar except that the SRR problem was with > the CTIO queue rather than the ATIO queue. I could expand the "bad > firmware" versions to include v9.04.00 - v9.08.00, since v9.04.00 > introduced ER147301 and v9.09.00 fixed FCD-1183. > > Thanks, > Tony > This is what I found by checking the PCI IDs on some of my HBAs: not affected QLE2672 16 Gbps ISP2031 8.08.231 FW affected QLE2694 16 Gpbs ISP2071 (tested) QLE2742 32 Gbps ISP2261 (not tested) QLE2872 64 Gpbs ISP2281 (not tested) So the following check should cover them: if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) Tony |
From: Tony B. <to...@cy...> - 2025-09-25 16:04:54
|
On 9/25/25 11:30, Xose Vazquez Perez wrote: > On 9/25/25 2:49 PM, Xose Vazquez Perez wrote: > >> If you want to review the firmware changelog, mainly: FCD-1183 (FCD-371, ER147301), FCD-259, ER146998 >> (from 9.00.00 to 9.15.05 [06/10/25]): >> https://www.marvell.com/content/dam/marvell/en/drivers/2025-06-10-release/fw_release_notes/Fibre_Channel_Firmware_Release_Notes.pdf >> >> It's look like all 2{678}xx devices/chips are affected by this bug. >> Perhaps the Marvel crew could provide more information on this. > 267x, or older, is still on 8.08, so apparently it's free of this bug: > https://www.marvell.com/content/dam/marvell/en/drivers/release-matrix/release-matrix-qlogic-fc-sw-posting-by-release-matrix.pdf > > 2870 / 2770 : 9.15.06 FW > 2740 / 2760 / 269x : 9.15.01 FW > 267x : 8.08.231 FW I am still trying to figure out what macros to use to test for the affected HBAs. So far I have: if (IS_QLA27XX(ha) || IS_QLA28XX(ha)) But all the ISP numbers are pretty confusing. I have a number of 8, 16, 32, and 64 Gbps HBAs lying around to test, but I am sure I don't have every possible model. There are a number of items under "Changes and Fixes from v9.08.00 to v9.09.00" that might be related to the problem that I was experiencing. For example FCD-1076 sounds similar except that the SRR problem was with the CTIO queue rather than the ATIO queue. I could expand the "bad firmware" versions to include v9.04.00 - v9.08.00, since v9.04.00 introduced ER147301 and v9.09.00 fixed FCD-1183. Thanks, Tony |