linux1394-devel Mailing List for IEEE 1394 for Linux
Brought to you by:
aeb,
bencollins
You can subscribe to this list here.
2000 |
Jan
|
Feb
|
Mar
(39) |
Apr
(154) |
May
(172) |
Jun
(237) |
Jul
(127) |
Aug
(135) |
Sep
(193) |
Oct
(175) |
Nov
(173) |
Dec
(148) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2001 |
Jan
(161) |
Feb
(225) |
Mar
(193) |
Apr
(158) |
May
(179) |
Jun
(292) |
Jul
(146) |
Aug
(134) |
Sep
(185) |
Oct
(190) |
Nov
(149) |
Dec
(161) |
2002 |
Jan
(186) |
Feb
(236) |
Mar
(254) |
Apr
(207) |
May
(189) |
Jun
(182) |
Jul
(202) |
Aug
(155) |
Sep
(149) |
Oct
(449) |
Nov
(191) |
Dec
(108) |
2003 |
Jan
(174) |
Feb
(242) |
Mar
(243) |
Apr
(255) |
May
(202) |
Jun
(290) |
Jul
(237) |
Aug
(178) |
Sep
(101) |
Oct
(153) |
Nov
(144) |
Dec
(95) |
2004 |
Jan
(162) |
Feb
(278) |
Mar
(282) |
Apr
(152) |
May
(127) |
Jun
(138) |
Jul
(94) |
Aug
(63) |
Sep
(64) |
Oct
(150) |
Nov
(102) |
Dec
(197) |
2005 |
Jan
(102) |
Feb
(172) |
Mar
(89) |
Apr
(158) |
May
(139) |
Jun
(160) |
Jul
(288) |
Aug
(89) |
Sep
(201) |
Oct
(92) |
Nov
(190) |
Dec
(139) |
2006 |
Jan
(121) |
Feb
(204) |
Mar
(133) |
Apr
(134) |
May
(91) |
Jun
(226) |
Jul
(122) |
Aug
(101) |
Sep
(144) |
Oct
(141) |
Nov
|
Dec
|
2023 |
Jan
(19) |
Feb
(1) |
Mar
(5) |
Apr
(5) |
May
(33) |
Jun
(17) |
Jul
|
Aug
|
Sep
(3) |
Oct
(1) |
Nov
(5) |
Dec
(40) |
2024 |
Jan
(26) |
Feb
(14) |
Mar
(26) |
Apr
(46) |
May
(17) |
Jun
(47) |
Jul
(23) |
Aug
(72) |
Sep
(42) |
Oct
(6) |
Nov
(2) |
Dec
(1) |
2025 |
Jan
(2) |
Feb
(1) |
Mar
(4) |
Apr
(2) |
May
|
Jun
(25) |
Jul
(12) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Takashi S. <o-t...@sa...> - 2025-07-28 01:51:45
|
The previous commit added reference counting to ensure safe invocations of address handlers. This commit moves the invocation of FCP address handlers outside of the RCU read critical section. Unlike the exclusive-region address handlers, all FCP address handlers should be called on receiving an FCP request. An XArray is used to collect the FCP address handlers during the RCU read-side critical section, after which they are invoked. Reference counting ensures that each FCP address handler is called safely. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/core-transaction.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index a742971c65fa..c5408c83709c 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -956,13 +956,14 @@ static void handle_fcp_region_request(struct fw_card *card, unsigned long long offset) { struct fw_address_handler *handler; + DEFINE_XARRAY_ALLOC(handlers); int tcode, destination, source; + unsigned long id; if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) && offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) || request->length > 0x200) { fw_send_response(card, request, RCODE_ADDRESS_ERROR); - return; } @@ -973,22 +974,39 @@ static void handle_fcp_region_request(struct fw_card *card, if (tcode != TCODE_WRITE_QUADLET_REQUEST && tcode != TCODE_WRITE_BLOCK_REQUEST) { fw_send_response(card, request, RCODE_TYPE_ERROR); + return; + } + // Reserve an entry outside the RCU read-side critical section to cover most cases. + id = 0; + if (xa_reserve(&handlers, id, GFP_KERNEL) < 0) { + fw_send_response(card, request, RCODE_CONFLICT_ERROR); return; } scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { if (is_enclosing_handler(handler, offset, request->length)) { - get_address_handler(handler); - handler->address_callback(card, request, tcode, destination, source, - p->generation, offset, request->data, - request->length, handler->callback_data); - put_address_handler(handler); + // FCP is used for purposes unrelated to significant system + // resources (e.g. storage or networking), so allocation + // failures are not considered so critical. + void *ptr = xa_store(&handlers, id, handler, GFP_ATOMIC); + if (!xa_is_err(ptr)) { + ++id; + get_address_handler(handler); + } } } } + xa_for_each(&handlers, id, handler) { + // Outside the RCU read-side critical section. Without spinlock. With reference count. + handler->address_callback(card, request, tcode, destination, source, p->generation, + offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } + + xa_destroy(&handlers); fw_send_response(card, request, RCODE_COMPLETE); } -- 2.48.1 |
From: Takashi S. <o-t...@sa...> - 2025-07-28 01:51:44
|
The previous commit added reference counting to ensure safe invocations of address handlers. This commit moves the invocation of handlers for exclusive regins outside of the RCU read-side critical section. The address handler for the requested region is selected within the critical section, then invoked outside of it. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/core-transaction.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 29ca9f3f14ce..a742971c65fa 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -935,17 +935,19 @@ static void handle_exclusive_region_request(struct fw_card *card, scoped_guard(rcu) { handler = lookup_enclosing_address_handler(&address_handler_list, offset, request->length); - if (handler) { + if (handler) get_address_handler(handler); - handler->address_callback(card, request, tcode, destination, source, - p->generation, offset, request->data, - request->length, handler->callback_data); - put_address_handler(handler); - } } - if (!handler) + if (!handler) { fw_send_response(card, request, RCODE_ADDRESS_ERROR); + return; + } + + // Outside the RCU read-side critical section. Without spinlock. With reference count. + handler->address_callback(card, request, tcode, destination, source, p->generation, offset, + request->data, request->length, handler->callback_data); + put_address_handler(handler); } static void handle_fcp_region_request(struct fw_card *card, -- 2.48.1 |
From: Takashi S. <o-t...@sa...> - 2025-07-28 01:51:43
|
The lifetime of address handler has been managed by linked list and RCU. This approach was introduced in commit 35202f7d8420 ("firewire: remove global lock around address handlers, convert to RCU"). The invocations of address handler are performed within RCU read-side critical sections. In commit 57e6d9f85fff ("firewire: ohci: use workqueue to handle events of AR request/response contexts"), the invocations are in a workqueue context. The approach still imposes limitation that sleeping is not allowed within RCU read-side critical sections. However, since sleeping is not permitted within RCU read-side critical sections, this approach still has a limitation. This commit adds reference counting to decouple handler invocation from handler discovery. The linked list and RCU is used to discover the handlers, while the reference counting is used to invoke them safely. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/core-transaction.c | 32 +++++++++++++++++++++++++++-- include/linux/firewire.h | 4 ++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index d28477d84697..29ca9f3f14ce 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -550,6 +550,23 @@ const struct fw_address_region fw_unit_space_region = { .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, }; #endif /* 0 */ +static void complete_address_handler(struct kref *kref) +{ + struct fw_address_handler *handler = container_of(kref, struct fw_address_handler, kref); + + complete(&handler->done); +} + +static void get_address_handler(struct fw_address_handler *handler) +{ + kref_get(&handler->kref); +} + +static int put_address_handler(struct fw_address_handler *handler) +{ + return kref_put(&handler->kref, complete_address_handler); +} + /** * fw_core_add_address_handler() - register for incoming requests * @handler: callback @@ -596,6 +613,8 @@ int fw_core_add_address_handler(struct fw_address_handler *handler, if (other != NULL) { handler->offset += other->length; } else { + init_completion(&handler->done); + kref_init(&handler->kref); list_add_tail_rcu(&handler->link, &address_handler_list); ret = 0; break; @@ -621,6 +640,9 @@ void fw_core_remove_address_handler(struct fw_address_handler *handler) list_del_rcu(&handler->link); synchronize_rcu(); + + if (!put_address_handler(handler)) + wait_for_completion(&handler->done); } EXPORT_SYMBOL(fw_core_remove_address_handler); @@ -913,10 +935,13 @@ static void handle_exclusive_region_request(struct fw_card *card, scoped_guard(rcu) { handler = lookup_enclosing_address_handler(&address_handler_list, offset, request->length); - if (handler) + if (handler) { + get_address_handler(handler); handler->address_callback(card, request, tcode, destination, source, p->generation, offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } } if (!handler) @@ -952,10 +977,13 @@ static void handle_fcp_region_request(struct fw_card *card, scoped_guard(rcu) { list_for_each_entry_rcu(handler, &address_handler_list, link) { - if (is_enclosing_handler(handler, offset, request->length)) + if (is_enclosing_handler(handler, offset, request->length)) { + get_address_handler(handler); handler->address_callback(card, request, tcode, destination, source, p->generation, offset, request->data, request->length, handler->callback_data); + put_address_handler(handler); + } } } diff --git a/include/linux/firewire.h b/include/linux/firewire.h index cceb70415ed2..d38c6e538e5c 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -341,7 +341,11 @@ struct fw_address_handler { u64 length; fw_address_callback_t address_callback; void *callback_data; + + // Only for core functions. struct list_head link; + struct kref kref; + struct completion done; }; struct fw_address_region { -- 2.48.1 |
From: Takashi S. <o-t...@sa...> - 2025-07-28 01:51:38
|
Hi, After applying the patch that replaces tasklet usage with workqueue [1], I realized there is a corner case where schedule() is called within an RCU read-side critical section when handling asynchronous transaction from local node to itself. The mechanism is: fw_send_request() ->ohci_send_request() ->at_context_transmit() ->handle_local_reuest() [to local node] ... ->fw_core_handle_request() ->handle_{exclusive,fcp}_region_request() [under RCU read locking] ->fw_send_response() ->ohci_send_response() ->at_context_transmit() ->handle_local_reuest() [to local node] ... ->fw_core_handle_response() ->ohci_cancel_packet() [for request packet] ->disable_work_sync() ->schedule() Previously, when cancelling the request subaction of a transaction, 'tasklet_disable_in_atomic()' was used. However, this function is now deprecated, as you know. It has been replaced with disable_work_sync() which requires that the workqueue be allowed to sleep when invoking the address handler. The purpose of this patch series is to move the invocation of address handlers outside the RCU read-side critical section. Reference counting is newly added to ensure safe invocation outside the section. [1] https://lore.kernel.org/lkml/202...@sa.../ Takashi Sakamoto (3): firewire: core: use reference counting to invoke address handlers safely firewire: core: call handler for exclusive regions outside RCU read-side critical section firewire: core: call FCP address handlers outside RCU read-side critical section drivers/firewire/core-transaction.c | 66 +++++++++++++++++++++++++---- include/linux/firewire.h | 4 ++ 2 files changed, 61 insertions(+), 9 deletions(-) Regards Takashi Sakamoto -- 2.48.1 |
From: AreYouLoco? <are...@pa...> - 2025-07-14 16:50:52
|
So looking at your previous mail output Takashi-san Capabilities: [4c] MSI: Enable- So seems like in your case quirk=0x10 seems to be applied. That disables MSI capability. That was found in other thread to be coherent solution to all mentioned problems me and other users had. Even hot-plug works correctly with that quirk. Did you apply quirk yourself manually in co fig and forgot. Or the kernel applied it by itself? Is it really applied in your case? Looking at the code in master the quirk suppose to be applied only to FW643 hw rev6 or pci id 6 or some sort. And I believe it is needed in general for that chip. (So ANY_PCI_ID as other quirks are set for other chipsets.) Possibly needed for other LSI (Agere) chips as well. Not only FW643. So for me its solved for now. But just looking for proper solution upstream. On June 20, 2025 1:12:49 PM UTC, Takashi Sakamoto <o-t...@sa...> wrote: >Hi, > >On Fri, Jun 20, 2025 at 08:52:42AM +0930, Jonathan Woithe wrote: >> Alternatively, are there any FW643 users who have no problem with device >> detection under a 6.12.x (or later) kernel? If so this would conclusively >> rule out a kernel issue. > >I can not regenerate the issue with the kernel modules provided by the >stock kernel in Ubuntu 25.04 amd64 (v6.14). The machine consists of > >* ASUSTeK COMPUTER INC. TUF GAMING X570-PLUS >* AMD Ryzen™ 7 5700G with Radeon™ Graphics × 16 > > >``` >$ dpkg -S /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/* >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-core.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-net.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-ohci.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-sbp2.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/nosy.ko.zst > >$ sudo lspci -vvnn >06:00.0 FireWire (IEEE 1394) [0c00]: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5901] (rev 06) (prog-if 10 [OHCI]) > Subsystem: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5900] > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- > Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- > Latency: 0, Cache Line Size: 64 bytes > Interrupt: pin A routed to IRQ 40 > IOMMU group: 20 > Region 0: Memory at fc600000 (64-bit, non-prefetchable) [size=4K] > Capabilities: [44] Power Management version 3 > Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA PME(D0+,D1+,D2+,D3hot+,D3cold+) > Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME+ > Capabilities: [4c] MSI: Enable- Count=1/1 Maskable- 64bit+ > Address: 0000000000000000 Data: 0000 > Capabilities: [60] Express (v1) Endpoint, IntMsgNum 0 > DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 <64us > ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- SlotPowerLimit 0W TEE-IO- > DevCtl: CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+ > RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop- > MaxPayload 128 bytes, MaxReadReq 512 bytes > DevSta: CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend- > LnkCap: Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <512ns, L1 <64us > ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp- > LnkCtl: ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+ > ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt- > LnkSta: Speed 2.5GT/s, Width x1 > TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt- > Capabilities: [100 v1] Advanced Error Reporting > UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > CESta: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr- CorrIntErr- HeaderOF- > CEMsk: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF- > AERCap: First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn- > MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap- > HeaderLog: 00000000 00000000 00000000 00000000 > Capabilities: [140 v1] Virtual Channel > Caps: LPEVC=0 RefClk=100ns PATEntryBits=1 > Arb: Fixed- WRR32- WRR64- WRR128- > Ctrl: ArbSelect=Fixed > Status: InProgress- > VC0: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- > Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- > Ctrl: Enable+ ID=0 ArbSelect=Fixed TC/VC=01 > Status: NegoPending- InProgress- > VC1: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- > Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- > Ctrl: Enable- ID=1 ArbSelect=Fixed TC/VC=00 > Status: NegoPending- InProgress- > Capabilities: [170 v1] Device Serial Number 12-34-56-10-12-30-00-86 > Kernel driver in use: firewire_ohci > Kernel modules: firewire_ohci > >``` > >My Phonic FireFly 808 Universal is detected successfully even after the >reported steps. > > >Thanks > >Takashi Sakamoto > > >_______________________________________________ >mailing list lin...@li... >https://lists.sourceforge.net/lists/listinfo/linux1394-devel |
From: Takashi S. <o-t...@sa...> - 2025-07-12 13:00:31
|
On Thu, Jul 10, 2025 at 10:19:16PM +0900, Takashi Sakamoto wrote: > In commit 386a4153a2c1 ("firewire: ohci: cache the context run bit"), a > running member was added to the context structure to cache the running > state of a given DMA context. Although this member is accessible from IR, > IT, and AT contexts, it is currently used only by the AT context. > > Additionally, the context structure includes a work item, which is also > used by the AT context. Both members are unnecessary for IR and IT > contexts. > > This commit refactors the code by moving these two members into a new > structure specific to AT context. > > Signed-off-by: Takashi Sakamoto <o-t...@sa...> > --- > drivers/firewire/ohci.c | 92 ++++++++++++++++++++++------------------- > 1 file changed, 49 insertions(+), 43 deletions(-) Applied to for-next branch. Thanks Takashi Sakamoto |
From: Takashi S. <o-t...@sa...> - 2025-07-10 13:19:27
|
In commit 386a4153a2c1 ("firewire: ohci: cache the context run bit"), a running member was added to the context structure to cache the running state of a given DMA context. Although this member is accessible from IR, IT, and AT contexts, it is currently used only by the AT context. Additionally, the context structure includes a work item, which is also used by the AT context. Both members are unnecessary for IR and IT contexts. This commit refactors the code by moving these two members into a new structure specific to AT context. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/ohci.c | 92 ++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 709a714fd5c8..5d8301b0f3aa 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -128,7 +128,6 @@ struct context { int total_allocation; u32 current_bus; bool running; - bool flushing; /* * List of page-sized buffers for storing DMA descriptors. @@ -157,8 +156,12 @@ struct context { int prev_z; descriptor_callback_t callback; +}; +struct at_context { + struct context context; struct work_struct work; + bool flushing; }; struct iso_context { @@ -204,8 +207,8 @@ struct fw_ohci { struct ar_context ar_request_ctx; struct ar_context ar_response_ctx; - struct context at_request_ctx; - struct context at_response_ctx; + struct at_context at_request_ctx; + struct at_context at_response_ctx; u32 it_context_support; u32 it_context_mask; /* unoccupied IT contexts */ @@ -1178,9 +1181,9 @@ static void context_retire_descriptors(struct context *ctx) static void ohci_at_context_work(struct work_struct *work) { - struct context *ctx = from_work(ctx, work, work); + struct at_context *ctx = from_work(ctx, work, work); - context_retire_descriptors(ctx); + context_retire_descriptors(&ctx->context); } static void ohci_isoc_context_work(struct work_struct *work) @@ -1382,17 +1385,17 @@ struct driver_data { * Must always be called with the ochi->lock held to ensure proper * generation handling and locking around packet queue manipulation. */ -static int at_context_queue_packet(struct context *ctx, - struct fw_packet *packet) +static int at_context_queue_packet(struct at_context *ctx, struct fw_packet *packet) { - struct fw_ohci *ohci = ctx->ohci; + struct context *context = &ctx->context; + struct fw_ohci *ohci = context->ohci; dma_addr_t d_bus, payload_bus; struct driver_data *driver_data; struct descriptor *d, *last; __le32 *header; int z, tcode; - d = context_get_descriptors(ctx, 4, &d_bus); + d = context_get_descriptors(context, 4, &d_bus); if (d == NULL) { packet->ack = RCODE_SEND_ERROR; return -1; @@ -1422,7 +1425,7 @@ static int at_context_queue_packet(struct context *ctx, ohci1394_at_data_set_destination_id(header, async_header_get_destination(packet->header)); - if (ctx == &ctx->ohci->at_response_ctx) { + if (ctx == &ohci->at_response_ctx) { ohci1394_at_data_set_rcode(header, async_header_get_rcode(packet->header)); } else { ohci1394_at_data_set_destination_offset(header, @@ -1511,17 +1514,17 @@ static int at_context_queue_packet(struct context *ctx, return -1; } - context_append(ctx, d, z, 4 - z); + context_append(context, d, z, 4 - z); - if (ctx->running) - reg_write(ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE); + if (context->running) + reg_write(ohci, CONTROL_SET(context->regs), CONTEXT_WAKE); else - context_run(ctx, 0); + context_run(context, 0); return 0; } -static void at_context_flush(struct context *ctx) +static void at_context_flush(struct at_context *ctx) { // Avoid dead lock due to programming mistake. if (WARN_ON_ONCE(current_work() == &ctx->work)) @@ -1540,12 +1543,13 @@ static int handle_at_packet(struct context *context, struct descriptor *d, struct descriptor *last) { + struct at_context *ctx = container_of(context, struct at_context, context); + struct fw_ohci *ohci = ctx->context.ohci; struct driver_data *driver_data; struct fw_packet *packet; - struct fw_ohci *ohci = context->ohci; int evt; - if (last->transfer_status == 0 && !READ_ONCE(context->flushing)) + if (last->transfer_status == 0 && !READ_ONCE(ctx->flushing)) /* This descriptor isn't done yet, stop iteration. */ return 0; @@ -1579,7 +1583,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_missing_ack: - if (READ_ONCE(context->flushing)) + if (READ_ONCE(ctx->flushing)) packet->ack = RCODE_GENERATION; else { /* @@ -1601,7 +1605,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_no_status: - if (READ_ONCE(context->flushing)) { + if (READ_ONCE(ctx->flushing)) { packet->ack = RCODE_GENERATION; break; } @@ -1698,13 +1702,14 @@ static void handle_local_lock(struct fw_ohci *ohci, fw_core_handle_response(&ohci->card, &response); } -static void handle_local_request(struct context *ctx, struct fw_packet *packet) +static void handle_local_request(struct at_context *ctx, struct fw_packet *packet) { + struct fw_ohci *ohci = ctx->context.ohci; u64 offset, csr; - if (ctx == &ctx->ohci->at_request_ctx) { + if (ctx == &ohci->at_request_ctx) { packet->ack = ACK_PENDING; - packet->callback(packet, &ctx->ohci->card, packet->ack); + packet->callback(packet, &ohci->card, packet->ack); } offset = async_header_get_offset(packet->header); @@ -1712,54 +1717,55 @@ static void handle_local_request(struct context *ctx, struct fw_packet *packet) /* Handle config rom reads. */ if (csr >= CSR_CONFIG_ROM && csr < CSR_CONFIG_ROM_END) - handle_local_rom(ctx->ohci, packet, csr); + handle_local_rom(ohci, packet, csr); else switch (csr) { case CSR_BUS_MANAGER_ID: case CSR_BANDWIDTH_AVAILABLE: case CSR_CHANNELS_AVAILABLE_HI: case CSR_CHANNELS_AVAILABLE_LO: - handle_local_lock(ctx->ohci, packet, csr); + handle_local_lock(ohci, packet, csr); break; default: - if (ctx == &ctx->ohci->at_request_ctx) - fw_core_handle_request(&ctx->ohci->card, packet); + if (ctx == &ohci->at_request_ctx) + fw_core_handle_request(&ohci->card, packet); else - fw_core_handle_response(&ctx->ohci->card, packet); + fw_core_handle_response(&ohci->card, packet); break; } - if (ctx == &ctx->ohci->at_response_ctx) { + if (ctx == &ohci->at_response_ctx) { packet->ack = ACK_COMPLETE; - packet->callback(packet, &ctx->ohci->card, packet->ack); + packet->callback(packet, &ohci->card, packet->ack); } } -static void at_context_transmit(struct context *ctx, struct fw_packet *packet) +static void at_context_transmit(struct at_context *ctx, struct fw_packet *packet) { + struct fw_ohci *ohci = ctx->context.ohci; unsigned long flags; int ret; - spin_lock_irqsave(&ctx->ohci->lock, flags); + spin_lock_irqsave(&ohci->lock, flags); - if (async_header_get_destination(packet->header) == ctx->ohci->node_id && - ctx->ohci->generation == packet->generation) { - spin_unlock_irqrestore(&ctx->ohci->lock, flags); + if (async_header_get_destination(packet->header) == ohci->node_id && + ohci->generation == packet->generation) { + spin_unlock_irqrestore(&ohci->lock, flags); // Timestamping on behalf of the hardware. - packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ctx->ohci)); + packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci)); handle_local_request(ctx, packet); return; } ret = at_context_queue_packet(ctx, packet); - spin_unlock_irqrestore(&ctx->ohci->lock, flags); + spin_unlock_irqrestore(&ohci->lock, flags); if (ret < 0) { // Timestamping on behalf of the hardware. - packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ctx->ohci)); + packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci)); - packet->callback(packet, &ctx->ohci->card, packet->ack); + packet->callback(packet, &ohci->card, packet->ack); } } @@ -2138,8 +2144,8 @@ static void bus_reset_work(struct work_struct *work) // FIXME: Document how the locking works. scoped_guard(spinlock_irq, &ohci->lock) { ohci->generation = -1; // prevent AT packet queueing - context_stop(&ohci->at_request_ctx); - context_stop(&ohci->at_response_ctx); + context_stop(&ohci->at_request_ctx.context); + context_stop(&ohci->at_response_ctx.context); } /* @@ -2683,7 +2689,7 @@ static void ohci_send_response(struct fw_card *card, struct fw_packet *packet) static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) { struct fw_ohci *ohci = fw_ohci(card); - struct context *ctx = &ohci->at_request_ctx; + struct at_context *ctx = &ohci->at_request_ctx; struct driver_data *driver_data = packet->driver_data; int ret = -ENOENT; @@ -3767,13 +3773,13 @@ static int pci_probe(struct pci_dev *dev, if (err < 0) return err; - err = context_init(&ohci->at_request_ctx, ohci, + err = context_init(&ohci->at_request_ctx.context, ohci, OHCI1394_AsReqTrContextControlSet, handle_at_packet); if (err < 0) return err; INIT_WORK(&ohci->at_request_ctx.work, ohci_at_context_work); - err = context_init(&ohci->at_response_ctx, ohci, + err = context_init(&ohci->at_response_ctx.context, ohci, OHCI1394_AsRspTrContextControlSet, handle_at_packet); if (err < 0) return err; -- 2.48.1 |
From: Takashi S. <o-t...@sa...> - 2025-07-10 12:52:43
|
Hi, On Thu, Jul 10, 2025 at 01:09:06PM +0530, Purva Yeshi wrote: > Fix Smatch-detected error: > drivers/firewire/ohci.c:1514 at_context_queue_packet() > error: uninitialized symbol 'payload_bus'. > > Smatch reports a potential uninitialized use of 'payload_bus' in > at_context_queue_packet(). If packet->payload_length is zero, the > variable may not be set before reaching the dma_unmap_single() call, > which could lead to undefined behavior. > > Initialize 'payload_bus' to 0 to ensure it has a defined value in all > code paths, preventing any uninitialized access. > > Signed-off-by: Purva Yeshi <pur...@gm...> In my opinion, we should pay enough attention to the value of 'packet->payload_mapped' variable when considering the issue. ``` $ cat -n drivers/firewire/ohci.c ... 1385 static int at_context_queue_packet(struct context *ctx, 1386 struct fw_packet *packet) 1387 { 1388 struct fw_ohci *ohci = ctx->ohci; 1389 dma_addr_t d_bus, payload_bus; ... 1474 if (packet->payload_length > 0) { 1475 if (packet->payload_length > sizeof(driver_data->inline_data)) { 1476 payload_bus = dma_map_single(ohci->card.device, ... 1485 packet->payload_mapped = true; 1486 } else { ... 1489 payload_bus = d_bus + 3 * sizeof(*d); 1490 } ... 1496 } else { ... 1499 } ... 1506 if (ohci->generation != packet->generation) { 1507 if (packet->payload_mapped) 1508 dma_unmap_single(ohci->card.device, payload_bus, 1509 packet->payload_length, DMA_TO_DEVICE); ... 1512 } Unless the variable has true, the dma_unmap_single() is never called, thus the issue does not occur. Thanks Takashi Sakamoto |
From: AreYouLoco? <are...@pa...> - 2025-07-06 11:06:19
|
I think I got it! I got interested in firewire-ohci quirks and my dmesg reported 0x0 for quirks by default. And when reading thru: <https://github.com/torvalds/linux/blob/1f988d0788f50d8464f957e793fab356e2937369/drivers/firewire/ohci.c#L330> It seems that it should apply 0x4 and 0x10 so 0x14. I've created: /etc/modprobe.d/firewire-ohci-quirks.conf options firewire-ohci quirks=0x14 And now it seems to work fine each time. And also hot-plug started to work! So the issue is that quirks that suppose to be applied are not for some reason. Could someone verify this on their end?! On July 5, 2025 4:11:06 PM UTC, AreYouLoco? via linux1394-devel <lin...@li...> wrote: >Just to note I am on exact same kernel version. > >Takashi さん tried to reproduce it on 6.14 and couldn't. > >I think I am going to try some cutting edge distro like Arch with newer kernel and see there. Kernel 6.15 from experimental Debian repo didnt boot for me. > >There were reports and from my own experience that it worked before on older kernels presumably 6.5.something. > > >On July 3, 2025 4:58:59 AM UTC, "小太" <no...@ko...e> wrote: >>On Thu, 3 Jul 2025 at 04:43, AreYouLoco? <are...@pa...> wrote: >> >>> Hi, >>> >>> I am observing similar problems reported already to the list also with >>> Agere but with FW643 and FW643E chipsets. >>> >>> Similarity here is OPs probes with unloading firewire-ohci. I am doing >>> same attempts to restore detection of devices. And truely I couldn't find a >>> pattern what combination works. And sometimes modprobe -r firewire-ohci >>> hangs for me as well and also normal reboot in that case hangs as well. >>> >>> I just do 10seconds power button then. >>> >>> So there seem to be some pattern with Agere chipsets not cooperating. >>> >>> May I ask OP: which kernel version are you on? >>> >> >>It's been happening for at least half a year now so it's not a recent issue >>by any means - I've just been too lazy to debug it given how infrequently >>it occurs for me >> >>But my current kernel version is 6.12.32 >> >>$ uname -a >>Linux home.kota.moe 6.12.32-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.12.32-1 >>(2025-06-07) x86_64 GNU/Linux |
From: AreYouLoco? <are...@pa...> - 2025-07-05 16:11:15
|
Just to note I am on exact same kernel version. Takashi さん tried to reproduce it on 6.14 and couldn't. I think I am going to try some cutting edge distro like Arch with newer kernel and see there. Kernel 6.15 from experimental Debian repo didnt boot for me. There were reports and from my own experience that it worked before on older kernels presumably 6.5.something. On July 3, 2025 4:58:59 AM UTC, "小太" <no...@ko...e> wrote: >On Thu, 3 Jul 2025 at 04:43, AreYouLoco? <are...@pa...> wrote: > >> Hi, >> >> I am observing similar problems reported already to the list also with >> Agere but with FW643 and FW643E chipsets. >> >> Similarity here is OPs probes with unloading firewire-ohci. I am doing >> same attempts to restore detection of devices. And truely I couldn't find a >> pattern what combination works. And sometimes modprobe -r firewire-ohci >> hangs for me as well and also normal reboot in that case hangs as well. >> >> I just do 10seconds power button then. >> >> So there seem to be some pattern with Agere chipsets not cooperating. >> >> May I ask OP: which kernel version are you on? >> > >It's been happening for at least half a year now so it's not a recent issue >by any means - I've just been too lazy to debug it given how infrequently >it occurs for me > >But my current kernel version is 6.12.32 > >$ uname -a >Linux home.kota.moe 6.12.32-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.12.32-1 >(2025-06-07) x86_64 GNU/Linux |
From: AreYouLoco? <are...@pa...> - 2025-07-02 18:43:11
|
Hi, I am observing similar problems reported already to the list also with Agere but with FW643 and FW643E chipsets. Similarity here is OPs probes with unloading firewire-ohci. I am doing same attempts to restore detection of devices. And truely I couldn't find a pattern what combination works. And sometimes modprobe -r firewire-ohci hangs for me as well and also normal reboot in that case hangs as well. I just do 10seconds power button then. So there seem to be some pattern with Agere chipsets not cooperating. May I ask OP: which kernel version are you on? On June 29, 2025 2:01:19 PM UTC, Takashi Sakamoto <o-t...@sa...> wrote: >Hi, > >On Sun, Jun 29, 2025 at 02:32:49PM +1000, 小太 wrote: >> The Firewire adapter is an old HP HI349 PCIe card (lspci info below), and >> it's connected to an M-Audio Profire 610 audio interface. >> CPU is a AMD 3960X on a TRX40 PRO WIFI motherboard > >This is the first time I've been involved with Agere FW533. Anyway >thanks for the information. > >> The hardware generally only stops responding when lights or exhaust fans >> are turned off on the same electrical circuit as the computer, but it's >> very infrequent. Perhaps from EMI or a voltage spike...? >> I originally had a generic chinese adapter (don't have lspci details, >> sorry) that I swapped out because it was a much more frequent issue on that >> one, and it mostly went away with the HP adapter. >> >> After the failure occurs, I have to power cycle the computer a few times >> before everything is working again (for both fw0 and fw1 devices to be >> detected). >> If I don't keep on power cycling until it works again, but instead: >> 1. Disconnect the Profire 610 (fw1) >> 2. Immediately power cycle >> 3. --> The Firewire adapter (fw0) gets detected fine >> 4. Unload firewire_ohci (modprobe -r firewire_ohci) >> 5. Reconnect the Profire 610 >> 6. Load firewire_ohci (modprobe firewire_ohci) >> 7. --> The Firewire adapter is not detected at all >> 8. Unload firewire_ohci again (modprobe -r firewire_ohci) >> 9. Disconnect the Profire 610 >> 10. Load firewire_ohci again (modprobe firewire_ohci) >> 11. --> The Firewire adapter is detected again >> >> If I mess around with the combination of reloading the firewire_ohci >> module, connecting/disconnecting the Profire 610, and probing things with >> FFADO, eventually a modprobe hangs indefinitely on wait_for_completion() in >> the kernel with the dmesg logs in my original message. >> Sometimes it even prevents systemd from powering off the computer (because >> it's waiting for modprobe to exit from a SIGKILL...), which is why I wanted >> to update the driver > >Two kernel messages are in your first post. The first one records that >the system does not execute interrupt service routine for hardIRQ which >should be triggered against any command request to transfer asynchronous packet >when retrieving the detected node device. The second one records that the >reference count of fw_card instance could not reach zero even if waiting so >long when modprobe(8) executes the delete_module system call. > >I guess the cause of later message is the lost of hardIRQ in the first >message, however, I would like you to compare timestamps for these >messages. > > >Thanks > >Takashi Sakamoto > > >_______________________________________________ >mailing list lin...@li... >https://lists.sourceforge.net/lists/listinfo/linux1394-devel |
From: Takashi S. <o-t...@sa...> - 2025-07-01 22:44:44
|
Hi, On Mon, Jun 30, 2025 at 12:10:15AM +1000, 小太 wrote: > On Mon, 30 Jun 2025 at 00:01, Takashi Sakamoto <o-t...@sa...> > wrote: > > > > > Two kernel messages are in your first post. The first one records that > > the system does not execute interrupt service routine for hardIRQ which > > should be triggered against any command request to transfer asynchronous > > packet > > when retrieving the detected node device. The second one records that the > > reference count of fw_card instance could not reach zero even if waiting so > > long when modprobe(8) executes the delete_module system call. > > > > Would it make sense to convert the wait_for_completion() > to wait_for_completion_timeout() in either case? > Potentially could leak a reference count, but it's better than a hung task > that prevents system shutdown... > > > > > > I guess the cause of later message is the lost of hardIRQ in the first > > message, however, I would like you to compare timestamps for these > > messages. > > > IIRC the two backtraces came from two completely different boots of the > kernel with different failure symptoms. > So probably unrelated to each other (except for the non-responsive hardware) Hm, these are independent... If that's the case, the issue might be partly due to how the userspace application is written. Typically, it is written not to call close(2) to the file descriptor of FireWire character device even if either any operations to it returns -ENODEV or it never receives the response of asynchronous transaction. The replacement by the kernel API to wait for any event with timeout is a simple solution, if we can ignore the apparent leak of system resources. Indeed, it is not preferable, you know. I think what we should do now is to identify/estimate the cause of the issue, especially for the incomplete transaction. At present, the transaction service of Linux kernel relies on the corresponding functionality on 1394 OHCI hardware. We assume that the hardware notifies the completion of transaction (including both transmission errors and timeout) by triggering IRQ (precisely the corresponding software driver operates the hardware to work so). If we need any software timeout to finish the transaction, it means that the combination of hardware (I mean the 1394 OHCI hardware, AMD PCIe Root Complex, AMD IO Hub, and so on) is enough lazy in the point. For example, I know an issue related to the critical quirk of PCIe AMD Ryzen PCIe functionality and the extention card with ASM108x/VT630x. AMD Ryzen machine has a concern to handle old hardware. I guess that your issue is similar to this kind of issue, somehow. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/firewire/ohci.c?id=ac9184fbb847 Regards Takashi Sakamoto |
From: Takashi S. <o-t...@sa...> - 2025-06-29 14:01:37
|
Hi, On Sun, Jun 29, 2025 at 02:32:49PM +1000, 小太 wrote: > The Firewire adapter is an old HP HI349 PCIe card (lspci info below), and > it's connected to an M-Audio Profire 610 audio interface. > CPU is a AMD 3960X on a TRX40 PRO WIFI motherboard This is the first time I've been involved with Agere FW533. Anyway thanks for the information. > The hardware generally only stops responding when lights or exhaust fans > are turned off on the same electrical circuit as the computer, but it's > very infrequent. Perhaps from EMI or a voltage spike...? > I originally had a generic chinese adapter (don't have lspci details, > sorry) that I swapped out because it was a much more frequent issue on that > one, and it mostly went away with the HP adapter. > > After the failure occurs, I have to power cycle the computer a few times > before everything is working again (for both fw0 and fw1 devices to be > detected). > If I don't keep on power cycling until it works again, but instead: > 1. Disconnect the Profire 610 (fw1) > 2. Immediately power cycle > 3. --> The Firewire adapter (fw0) gets detected fine > 4. Unload firewire_ohci (modprobe -r firewire_ohci) > 5. Reconnect the Profire 610 > 6. Load firewire_ohci (modprobe firewire_ohci) > 7. --> The Firewire adapter is not detected at all > 8. Unload firewire_ohci again (modprobe -r firewire_ohci) > 9. Disconnect the Profire 610 > 10. Load firewire_ohci again (modprobe firewire_ohci) > 11. --> The Firewire adapter is detected again > > If I mess around with the combination of reloading the firewire_ohci > module, connecting/disconnecting the Profire 610, and probing things with > FFADO, eventually a modprobe hangs indefinitely on wait_for_completion() in > the kernel with the dmesg logs in my original message. > Sometimes it even prevents systemd from powering off the computer (because > it's waiting for modprobe to exit from a SIGKILL...), which is why I wanted > to update the driver Two kernel messages are in your first post. The first one records that the system does not execute interrupt service routine for hardIRQ which should be triggered against any command request to transfer asynchronous packet when retrieving the detected node device. The second one records that the reference count of fw_card instance could not reach zero even if waiting so long when modprobe(8) executes the delete_module system call. I guess the cause of later message is the lost of hardIRQ in the first message, however, I would like you to compare timestamps for these messages. Thanks Takashi Sakamoto |
From: Takashi S. <o-t...@sa...> - 2025-06-29 02:48:53
|
Hi, I'm sorry to be late for reply, however I was traveling. On Mon, Jun 23, 2025 at 05:53:47PM +1000, 小太 wrote: > Hello, I tried sending an email (see below) to > lin...@li... a few days ago but it hasn't showed > up on the archive: > https://sourceforge.net/p/linux1394/mailman/linux1394-devel/?viewmonth=202506 > Is it awaiting moderation or got dropped as spam? Did I send it wrong? The moderation of list is kind of outside what I usually deal with, unforntunately. Additionally, the moderators of list is inactive so long. > ---------- Forwarded message --------- > From: 小太 <no...@ko...e> > Date: Fri, 20 Jun 2025 at 19:23 > Subject: Re: Adding timeouts to device access? > To: <lin...@li...> > > > On Fri, 20 Jun 2025 at 19:17, 小太 <no...@ko...e> wrote: > > > Hello, I've been trying to debug an odd issue with my hardware where it > > sometimes just stops responding (hardware failure is my guess at the > > moment). > > This resulted in hung tasks in the kernel that sometimes even prevents the > > OS from cleanly shutting down > > > > In particular, I believe these lines are prone to hanging indefinitely in > > the event the hardware fails to respond: > > - > > https://github.com/torvalds/linux/blob/75f5f23f8787c5e184fcb2fbcd02d8e9317dc5e7/drivers/firewire/core-card.c#L753 > > - > > https://github.com/torvalds/linux/blob/75f5f23f8787c5e184fcb2fbcd02d8e9317dc5e7/drivers/firewire/core-transaction.c#L433 > > > > Would it make sense to turn these into wait_for_completion_timeout() or > > even wait_for_completion_interruptible_timeout() instead? > > If so, some questions: > > - What would be a reasonable timeout value? 1 second? Less? > > - If fw_run_transaction() times out, what should it > > return? RCODE_CANCELLED? > > > > Whoops, I was meant to include the hung task logs but forgot. Here they are > > (Snip) Would I ask you the situation facing the issue? Especially, what kind of 1394 OHCI hardware you used. Thanks Takashi Sakamoto |
From: AreYouLoco? <are...@pa...> - 2025-06-20 18:34:51
|
So here it happened again. Kernel 6.12.32-rt-amd64 (Debian Trixie): Here is log after soft reboot: ``` areyouloco@studio:~$ sudo dmesg -w | grep -i firewire [ 1.120409] firewire_ohci 0000:03:00.0: added OHCI v1.10 device as card 0, 8 IR + 8 IT contexts, quirks 0x0, physUB [ 17.057643] libahci sha256_ssse3 sha1_ssse3 xhci_pci libata xhci_hcd ehci_pci iTCO_wdt sdhci_pci intel_pmc_bxt firewire_ohci ehci_hcd cqhci iTCO_vendor_support aesni_intel tg3 sdhci firewire_core scsi_mod gf128mul usbcore watchdog thunderbolt crypto_simd i2c_i801 mmc_core libphy cryptd i2c_smbus crc_itu_t lpc_ich scsi_common usb_common video wmi ``` And thats it related to firewire. Notice. No mention of fw0 or OHCI device added. Now I do: ``` areyouloco@studio:~$ sudo modprobe -r firewire_ohci areyouloco@studio:~$ sudo modprobe -r firewire_core areyouloco@studio:~$ sleep 2 areyouloco@studio:~$ sudo modprobe firewire-ohci ``` And then in dmesg I get: ``` [ 141.661165] firewire_ohci 0000:03:00.0: removing fw-ohci device [ 141.661261] firewire_ohci 0000:03:00.0: removed fw-ohci device [ 155.721784] firewire_ohci 0000:03:00.0: added OHCI v1.10 device as card 0, 8 IR + 8 IT contexts, quirks 0x0, physUB [ 156.245972] firewire_core 0000:03:00.0: created device fw0: GUID 003ee1fffe873194, S800 [ 156.245992] firewire_core 0000:03:00.0: phy config: new root=ffc1, gap_count=5 [ 159.293474] firewire_core 0000:03:00.0: created device fw1: GUID 000ff2050000145b, S400 ``` This is all I got for now. For firewire-ohci module in my kernel I see parameter possible to set: "parm: debug:Verbose logging, deprecated in v6.11 kernel or later. (default = 0, AT/AR events = 1, self-IDs = 2, IRQs = 4, or a combination, or all = -1) (int)" Is it still a way to debug further? I've read somewhere there was a replacement for this debug parameter via some calls. Machine is MacBook Pro Mid-2012 13''. But I also got other MBP and some full-size PCIe card also with that same chipset. On the other MBP behavior is similar I didn't check with PC yet. I might do that if requested. Hope it helps anyhow. I am getting kernel 6.15 to try out. On 6/20/25 3:12 PM, Takashi Sakamoto wrote: > Hi, > > On Fri, Jun 20, 2025 at 08:52:42AM +0930, Jonathan Woithe wrote: >> Alternatively, are there any FW643 users who have no problem with device >> detection under a 6.12.x (or later) kernel? If so this would conclusively >> rule out a kernel issue. > I can not regenerate the issue with the kernel modules provided by the > stock kernel in Ubuntu 25.04 amd64 (v6.14). The machine consists of > > * ASUSTeK COMPUTER INC. TUF GAMING X570-PLUS > * AMD Ryzen™ 7 5700G with Radeon™ Graphics × 16 > > > ``` > $ dpkg -S /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/* > linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-core.ko.zst > linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-net.ko.zst > linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-ohci.ko.zst > linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-sbp2.ko.zst > linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/nosy.ko.zst > > $ sudo lspci -vvnn > 06:00.0 FireWire (IEEE 1394) [0c00]: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5901] (rev 06) (prog-if 10 [OHCI]) > Subsystem: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5900] > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- > Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- > Latency: 0, Cache Line Size: 64 bytes > Interrupt: pin A routed to IRQ 40 > IOMMU group: 20 > Region 0: Memory at fc600000 (64-bit, non-prefetchable) [size=4K] > Capabilities: [44] Power Management version 3 > Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA PME(D0+,D1+,D2+,D3hot+,D3cold+) > Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME+ > Capabilities: [4c] MSI: Enable- Count=1/1 Maskable- 64bit+ > Address: 0000000000000000 Data: 0000 > Capabilities: [60] Express (v1) Endpoint, IntMsgNum 0 > DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 <64us > ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- SlotPowerLimit 0W TEE-IO- > DevCtl: CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+ > RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop- > MaxPayload 128 bytes, MaxReadReq 512 bytes > DevSta: CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend- > LnkCap: Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <512ns, L1 <64us > ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp- > LnkCtl: ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+ > ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt- > LnkSta: Speed 2.5GT/s, Width x1 > TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt- > Capabilities: [100 v1] Advanced Error Reporting > UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > CESta: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr- CorrIntErr- HeaderOF- > CEMsk: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF- > AERCap: First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn- > MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap- > HeaderLog: 00000000 00000000 00000000 00000000 > Capabilities: [140 v1] Virtual Channel > Caps: LPEVC=0 RefClk=100ns PATEntryBits=1 > Arb: Fixed- WRR32- WRR64- WRR128- > Ctrl: ArbSelect=Fixed > Status: InProgress- > VC0: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- > Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- > Ctrl: Enable+ ID=0 ArbSelect=Fixed TC/VC=01 > Status: NegoPending- InProgress- > VC1: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- > Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- > Ctrl: Enable- ID=1 ArbSelect=Fixed TC/VC=00 > Status: NegoPending- InProgress- > Capabilities: [170 v1] Device Serial Number 12-34-56-10-12-30-00-86 > Kernel driver in use: firewire_ohci > Kernel modules: firewire_ohci > > ``` > > My Phonic FireFly 808 Universal is detected successfully even after the > reported steps. > > > Thanks > > Takashi Sakamoto > > > _______________________________________________ > mailing list lin...@li... > https://lists.sourceforge.net/lists/listinfo/linux1394-devel |
From: AreYouLoco? <are...@pa...> - 2025-06-20 13:32:39
|
Hi Takashi, Bit its not pci firewire controller itself not getting detected but root /dev/fw0 sometimes wont appear and thus connected devices /dev/fw1 /dev/fw2 because of that. Please if you could check dmesg log few times if GUID for device is there in dmesg. Cold boot and soft boot might be a difference here. Good that you have this chipset around. Hope you can reproduce it. Cheers On June 20, 2025 1:12:49 PM UTC, Takashi Sakamoto <o-t...@sa...> wrote: >Hi, > >On Fri, Jun 20, 2025 at 08:52:42AM +0930, Jonathan Woithe wrote: >> Alternatively, are there any FW643 users who have no problem with device >> detection under a 6.12.x (or later) kernel? If so this would conclusively >> rule out a kernel issue. > >I can not regenerate the issue with the kernel modules provided by the >stock kernel in Ubuntu 25.04 amd64 (v6.14). The machine consists of > >* ASUSTeK COMPUTER INC. TUF GAMING X570-PLUS >* AMD Ryzen™ 7 5700G with Radeon™ Graphics × 16 > > >``` >$ dpkg -S /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/* >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-core.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-net.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-ohci.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-sbp2.ko.zst >linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/nosy.ko.zst > >$ sudo lspci -vvnn >06:00.0 FireWire (IEEE 1394) [0c00]: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5901] (rev 06) (prog-if 10 [OHCI]) > Subsystem: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5900] > Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- > Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- > Latency: 0, Cache Line Size: 64 bytes > Interrupt: pin A routed to IRQ 40 > IOMMU group: 20 > Region 0: Memory at fc600000 (64-bit, non-prefetchable) [size=4K] > Capabilities: [44] Power Management version 3 > Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA PME(D0+,D1+,D2+,D3hot+,D3cold+) > Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME+ > Capabilities: [4c] MSI: Enable- Count=1/1 Maskable- 64bit+ > Address: 0000000000000000 Data: 0000 > Capabilities: [60] Express (v1) Endpoint, IntMsgNum 0 > DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 <64us > ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- SlotPowerLimit 0W TEE-IO- > DevCtl: CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+ > RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop- > MaxPayload 128 bytes, MaxReadReq 512 bytes > DevSta: CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend- > LnkCap: Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <512ns, L1 <64us > ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp- > LnkCtl: ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+ > ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt- > LnkSta: Speed 2.5GT/s, Width x1 > TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt- > Capabilities: [100 v1] Advanced Error Reporting > UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ > ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- > PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- > CESta: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr- CorrIntErr- HeaderOF- > CEMsk: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF- > AERCap: First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn- > MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap- > HeaderLog: 00000000 00000000 00000000 00000000 > Capabilities: [140 v1] Virtual Channel > Caps: LPEVC=0 RefClk=100ns PATEntryBits=1 > Arb: Fixed- WRR32- WRR64- WRR128- > Ctrl: ArbSelect=Fixed > Status: InProgress- > VC0: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- > Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- > Ctrl: Enable+ ID=0 ArbSelect=Fixed TC/VC=01 > Status: NegoPending- InProgress- > VC1: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- > Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- > Ctrl: Enable- ID=1 ArbSelect=Fixed TC/VC=00 > Status: NegoPending- InProgress- > Capabilities: [170 v1] Device Serial Number 12-34-56-10-12-30-00-86 > Kernel driver in use: firewire_ohci > Kernel modules: firewire_ohci > >``` > >My Phonic FireFly 808 Universal is detected successfully even after the >reported steps. > > >Thanks > >Takashi Sakamoto > > >_______________________________________________ >mailing list lin...@li... >https://lists.sourceforge.net/lists/listinfo/linux1394-devel |
From: Takashi S. <o-t...@sa...> - 2025-06-20 13:13:01
|
Hi, On Fri, Jun 20, 2025 at 08:52:42AM +0930, Jonathan Woithe wrote: > Alternatively, are there any FW643 users who have no problem with device > detection under a 6.12.x (or later) kernel? If so this would conclusively > rule out a kernel issue. I can not regenerate the issue with the kernel modules provided by the stock kernel in Ubuntu 25.04 amd64 (v6.14). The machine consists of * ASUSTeK COMPUTER INC. TUF GAMING X570-PLUS * AMD Ryzen™ 7 5700G with Radeon™ Graphics × 16 ``` $ dpkg -S /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/* linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-core.ko.zst linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-net.ko.zst linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-ohci.ko.zst linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/firewire-sbp2.ko.zst linux-modules-extra-6.14.0-15-generic: /lib/modules/6.14.0-15-generic/kernel/drivers/firewire/nosy.ko.zst $ sudo lspci -vvnn 06:00.0 FireWire (IEEE 1394) [0c00]: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5901] (rev 06) (prog-if 10 [OHCI]) Subsystem: LSI Corporation FW643 [TrueFire] PCIe 1394b Controller [11c1:5900] Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- Latency: 0, Cache Line Size: 64 bytes Interrupt: pin A routed to IRQ 40 IOMMU group: 20 Region 0: Memory at fc600000 (64-bit, non-prefetchable) [size=4K] Capabilities: [44] Power Management version 3 Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=0 PME+ Capabilities: [4c] MSI: Enable- Count=1/1 Maskable- 64bit+ Address: 0000000000000000 Data: 0000 Capabilities: [60] Express (v1) Endpoint, IntMsgNum 0 DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <4us, L1 <64us ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset- SlotPowerLimit 0W TEE-IO- DevCtl: CorrErr+ NonFatalErr+ FatalErr+ UnsupReq+ RlxdOrd+ ExtTag- PhantFunc- AuxPwr- NoSnoop- MaxPayload 128 bytes, MaxReadReq 512 bytes DevSta: CorrErr- NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend- LnkCap: Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit Latency L0s <512ns, L1 <64us ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp- LnkCtl: ASPM Disabled; RCB 64 bytes, LnkDisable- CommClk+ ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt- LnkSta: Speed 2.5GT/s, Width x1 TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt- Capabilities: [100 v1] Advanced Error Reporting UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt- RxOF- MalfTLP- ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt- RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol- UncorrIntErr- BlockedTLP- AtomicOpBlocked- TLPBlockedErr- PoisonTLPBlocked- DMWrReqBlocked- IDECheck- MisIDETLP- PCRC_CHECK- TLPXlatBlocked- CESta: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr- CorrIntErr- HeaderOF- CEMsk: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+ CorrIntErr- HeaderOF- AERCap: First Error Pointer: 00, ECRCGenCap+ ECRCGenEn- ECRCChkCap+ ECRCChkEn- MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap- HeaderLog: 00000000 00000000 00000000 00000000 Capabilities: [140 v1] Virtual Channel Caps: LPEVC=0 RefClk=100ns PATEntryBits=1 Arb: Fixed- WRR32- WRR64- WRR128- Ctrl: ArbSelect=Fixed Status: InProgress- VC0: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- Ctrl: Enable+ ID=0 ArbSelect=Fixed TC/VC=01 Status: NegoPending- InProgress- VC1: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans- Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256- Ctrl: Enable- ID=1 ArbSelect=Fixed TC/VC=00 Status: NegoPending- InProgress- Capabilities: [170 v1] Device Serial Number 12-34-56-10-12-30-00-86 Kernel driver in use: firewire_ohci Kernel modules: firewire_ohci ``` My Phonic FireFly 808 Universal is detected successfully even after the reported steps. Thanks Takashi Sakamoto |
From: Jonathan W. <jw...@ju...> - 2025-06-20 10:00:04
|
On Fri, Jun 20, 2025 at 09:32:57AM +0000, AreYouLoco? wrote: > Good you reported it even before I did. Thanks for adding your input. > I am willing to build kernels and test patches. Oh ok, that's awesome! Would you be willing to do what's called a "git bisect" to discover the kernel that faulted? In simplified terms the process is this: 1. Identify a kernel that works (6.6 in our case) and one that doesn't (6.12 for the present situation). Call one that works version W and the one that doesn't F. 2. Choose a kernel half way between W and F. Build and test. 3. If the new kernel works, set W to that kernel version. If the new kernel fails, set F to that kernel version. 4. Go to step 2 until the earliest failing version is found. In practice the "versions" used here are git commits but the principle remains the same. The git revision control system also greatly simplifies this by automating the management of the versions. Steps 2-4 come down to answering a "did it work" question and recompiling/testing a new kernel. If you do a web search on "linux kernel git bisect" you'll find many tutorials which walk you through the process. It would be *really* appreciated if you could give this a go as it should identify the precise kernel commit that caused the problem. I'm happy to assist off-list if required. Regards jonathan |
From: AreYouLoco? <are...@pa...> - 2025-06-20 09:33:10
|
Hi Jonathan, Good you reported it even before I did. I have 3 different systems with FW643 and the problem persists across all of them. I also have some old thinkpad with TI chipset express-card. No any discoverable issue there. So I am quite certain its chipset specific here. Here is another report of same problematic chipset and user not aware that he also used the same one in several tests: <https://github.com/takaswie/linux-firewire-dkms/issues/59> Thank you for your interest, I am willing to build kernels and test patches. On June 19, 2025 11:22:42 PM UTC, Jonathan Woithe <jw...@ju...> wrote: >Hi all > >The FFADO project has received several reports from people running recent >kernels which suggest the kernel has stopped detecting bus resets when a >PCIe FW643 host controller is in use, and (perhaps as a result) is failing >to enumerate devices on the bus. This was first reported by a user running >Fedora 42. When running Fedora 41 their system would detect firewire >devices without any issues, and /dev/fw1 was created. When they upgraded to >Fedora 42, however, /dev/fw1 wasn't being created and the system logs didn't >contain any messages from the firefire subsystem to indicate that any >devices had been seen. > >Another user running Endeavour OS also reported the same trouble. > >The failure to detect devices applied when devices were connected at boot >and if they were plugged in sometime after booting. > >Follow up discussions suggest that kernel 6.6 is okay while 6.12 and all >versions which follow it are not. The problem only seems to affect users >with FW643 host controllers. The precise kernel version that first >displayed the symptoms is not yet identified. > >For those affected by this issue and running kernel 6.14.5, the following >command sequence usually results in a correctly functioning firewire system: > > sudo modprobe -r firewire-ohci > sudo modprobe -r firewire-core > sleep 2 > sudo modprobe firewire-ohci > >It has been reported that this does not work in 6.14.3 or 5.14.4. > >Since the reporters are not developers, the kernels they have access to are >limited to those supported out of the box by their distributions. A git >bisect will only be possible if someone with the requisite knowledge can >also reproduce the problem. The information to hand at present is therefore >somewhat incomplete. It may take some time to obtain a clear picture of >what might be going on. > >Unfortunately I don't personally have a FW643 host controller so I am not >able to investigate this myself. For what it's worth, the 6.12.26 kernel is >fine for me when using a TI PCI host controller. > >At this stage I was wondering whether there have been any changes between >the 6.6 and 6.12 kernels within the firewire subsystem (or maybe the PCIe >subsystem) which could have inadvertently caused this issue with FW643 host >controllers. > >Alternatively, are there any FW643 users who have no problem with device >detection under a 6.12.x (or later) kernel? If so this would conclusively >rule out a kernel issue. > >Regards > jonathan > > >_______________________________________________ >mailing list lin...@li... >https://lists.sourceforge.net/lists/listinfo/linux1394-devel |
From: Jonathan W. <jw...@ju...> - 2025-06-19 23:22:59
|
Hi all The FFADO project has received several reports from people running recent kernels which suggest the kernel has stopped detecting bus resets when a PCIe FW643 host controller is in use, and (perhaps as a result) is failing to enumerate devices on the bus. This was first reported by a user running Fedora 42. When running Fedora 41 their system would detect firewire devices without any issues, and /dev/fw1 was created. When they upgraded to Fedora 42, however, /dev/fw1 wasn't being created and the system logs didn't contain any messages from the firefire subsystem to indicate that any devices had been seen. Another user running Endeavour OS also reported the same trouble. The failure to detect devices applied when devices were connected at boot and if they were plugged in sometime after booting. Follow up discussions suggest that kernel 6.6 is okay while 6.12 and all versions which follow it are not. The problem only seems to affect users with FW643 host controllers. The precise kernel version that first displayed the symptoms is not yet identified. For those affected by this issue and running kernel 6.14.5, the following command sequence usually results in a correctly functioning firewire system: sudo modprobe -r firewire-ohci sudo modprobe -r firewire-core sleep 2 sudo modprobe firewire-ohci It has been reported that this does not work in 6.14.3 or 5.14.4. Since the reporters are not developers, the kernels they have access to are limited to those supported out of the box by their distributions. A git bisect will only be possible if someone with the requisite knowledge can also reproduce the problem. The information to hand at present is therefore somewhat incomplete. It may take some time to obtain a clear picture of what might be going on. Unfortunately I don't personally have a FW643 host controller so I am not able to investigate this myself. For what it's worth, the 6.12.26 kernel is fine for me when using a TI PCI host controller. At this stage I was wondering whether there have been any changes between the 6.6 and 6.12 kernels within the firewire subsystem (or maybe the PCIe subsystem) which could have inadvertently caused this issue with FW643 host controllers. Alternatively, are there any FW643 users who have no problem with device detection under a 6.12.x (or later) kernel? If so this would conclusively rule out a kernel issue. Regards jonathan |
From: Takashi S. <o-t...@sa...> - 2025-06-18 23:47:42
|
On Tue, Jun 17, 2025 at 09:43:20AM +0900, Takashi Sakamoto wrote: > The table for gap count is accessed by a single function. In this case, > it can be localized to the function. > > Signed-off-by: Takashi Sakamoto <o-t...@sa...> > --- > drivers/firewire/core-card.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) Applied to for-next branch. Thanks Takashi Sakamoto |
From: Takashi S. <o-t...@sa...> - 2025-06-17 00:43:35
|
The table for gap count is accessed by a single function. In this case, it can be localized to the function. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/core-card.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index b3e48ca516fe..aae774e7a5c3 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -273,10 +273,6 @@ static void allocate_broadcast_channel(struct fw_card *card, int generation) fw_device_set_broadcast_channel); } -static const char gap_count_table[] = { - 63, 5, 7, 8, 10, 13, 16, 18, 21, 24, 26, 29, 32, 35, 37, 40 -}; - void fw_schedule_bm_work(struct fw_card *card, unsigned long delay) { fw_card_get(card); @@ -286,6 +282,9 @@ void fw_schedule_bm_work(struct fw_card *card, unsigned long delay) static void bm_work(struct work_struct *work) { + static const char gap_count_table[] = { + 63, 5, 7, 8, 10, 13, 16, 18, 21, 24, 26, 29, 32, 35, 37, 40 + }; struct fw_card *card = from_work(card, work, bm_work.work); struct fw_device *root_device, *irm_device; struct fw_node *root_node; base-commit: aef6bcc0f278eba408751f8b3e0beae992e9faec -- 2.48.1 |
From: Takashi S. <o-t...@sa...> - 2025-06-17 00:30:59
|
Hi, On Sun, Jun 15, 2025 at 10:32:50PM +0900, Takashi Sakamoto wrote: > Hi, > > This is the revised version of v1 patchset[1]. > > Last year, in Linux kernel v6.12, the bottom-halves for isochronous > contexts of 1394 OHCI PCI driver were changed to use workqueue instead of > tasklet (softIRQ)[2]. I have received no reports of any issues related to the > change until today. Therefore, I believe it's time to move on to the next > step. > > This patchset updates the driver to use a regular workqueue (not WQ_BH) to > handle 1394 OHCI AT/AR context events. Unlike isochronous contexts, the > asynchronous contexts are used by the implementation of the SCSI over > IEEE 1394 protocol (sbp2). The workqueue is allocated with WQ_MEM_RECLAIM > flag so that it can still participate in memory reclaim paths. > > With this change, all remaining uses of tasklets in the subsystem are > completely removed. > > [1] https://lore.kernel.org/lkml/202...@sa.../ > [2] https://lore.kernel.org/lkml/202...@sa.../ > > > Changes from v1 patchset: > > * Fix "error: cannot jump from this goto statement to its label" > * https://lore.kernel.org/lkml/202...@sa.../ > * Fix indentations. > > Takashi Sakamoto (3): > firewire: core: allocate workqueue for AR/AT request/response contexts > firewire: ohci: use workqueue to handle events of AR request/response > contexts > firewire: ohci: use workqueue to handle events of AT request/response > contexts > > drivers/firewire/core-card.c | 48 +++++++++++++++------ > drivers/firewire/core-transaction.c | 7 +-- > drivers/firewire/net.c | 4 +- > drivers/firewire/ohci.c | 67 +++++++++++++++-------------- > include/linux/firewire.h | 12 +++++- > 5 files changed, 85 insertions(+), 53 deletions(-) Applied to for-next branch. Thanks Takashi Sakamoto |
From: Takashi S. <o-t...@sa...> - 2025-06-15 13:33:15
|
This commit adds a work item to handle events of 1394 OHCI AT request/response contexts, and queues the item to the specific workqueue. The call of struct fw_packet.callbaqck() is done in the workqueue when receiving acknowledgement to the asynchronous packet transferred to remote node. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/net.c | 4 ++-- drivers/firewire/ohci.c | 40 ++++++++++++++++++++++++---------------- include/linux/firewire.h | 11 +++++++++-- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 1bf0e15c1540..6d6446713539 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -1007,7 +1007,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) spin_lock_irqsave(&dev->lock, flags); - /* If the AT tasklet already ran, we may be last user. */ + /* If the AT work item already ran, we may be last user. */ free = (ptask->outstanding_pkts == 0 && !ptask->enqueued); if (!free) ptask->enqueued = true; @@ -1026,7 +1026,7 @@ static int fwnet_send_packet(struct fwnet_packet_task *ptask) spin_lock_irqsave(&dev->lock, flags); - /* If the AT tasklet already ran, we may be last user. */ + /* If the AT work item already ran, we may be last user. */ free = (ptask->outstanding_pkts == 0 && !ptask->enqueued); if (!free) ptask->enqueued = true; diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 68317b5a64a7..709a714fd5c8 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -158,7 +158,7 @@ struct context { descriptor_callback_t callback; - struct tasklet_struct tasklet; + struct work_struct work; }; struct iso_context { @@ -1176,9 +1176,9 @@ static void context_retire_descriptors(struct context *ctx) } } -static void context_tasklet(unsigned long data) +static void ohci_at_context_work(struct work_struct *work) { - struct context *ctx = (struct context *) data; + struct context *ctx = from_work(ctx, work, work); context_retire_descriptors(ctx); } @@ -1243,7 +1243,6 @@ static int context_init(struct context *ctx, struct fw_ohci *ohci, ctx->buffer_tail = list_entry(ctx->buffer_list.next, struct descriptor_buffer, list); - tasklet_init(&ctx->tasklet, context_tasklet, (unsigned long)ctx); ctx->callback = callback; /* @@ -1524,13 +1523,17 @@ static int at_context_queue_packet(struct context *ctx, static void at_context_flush(struct context *ctx) { - tasklet_disable(&ctx->tasklet); + // Avoid dead lock due to programming mistake. + if (WARN_ON_ONCE(current_work() == &ctx->work)) + return; - ctx->flushing = true; - context_tasklet((unsigned long)ctx); - ctx->flushing = false; + disable_work_sync(&ctx->work); - tasklet_enable(&ctx->tasklet); + WRITE_ONCE(ctx->flushing, true); + ohci_at_context_work(&ctx->work); + WRITE_ONCE(ctx->flushing, false); + + enable_work(&ctx->work); } static int handle_at_packet(struct context *context, @@ -1542,7 +1545,7 @@ static int handle_at_packet(struct context *context, struct fw_ohci *ohci = context->ohci; int evt; - if (last->transfer_status == 0 && !context->flushing) + if (last->transfer_status == 0 && !READ_ONCE(context->flushing)) /* This descriptor isn't done yet, stop iteration. */ return 0; @@ -1576,7 +1579,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_missing_ack: - if (context->flushing) + if (READ_ONCE(context->flushing)) packet->ack = RCODE_GENERATION; else { /* @@ -1598,7 +1601,7 @@ static int handle_at_packet(struct context *context, break; case OHCI1394_evt_no_status: - if (context->flushing) { + if (READ_ONCE(context->flushing)) { packet->ack = RCODE_GENERATION; break; } @@ -2239,10 +2242,10 @@ static irqreturn_t irq_handler(int irq, void *data) queue_work(ohci->card.async_wq, &ohci->ar_response_ctx.work); if (event & OHCI1394_reqTxComplete) - tasklet_schedule(&ohci->at_request_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->at_request_ctx.work); if (event & OHCI1394_respTxComplete) - tasklet_schedule(&ohci->at_response_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->at_response_ctx.work); if (event & OHCI1394_isochRx) { iso_event = reg_read(ohci, OHCI1394_IsoRecvIntEventClear); @@ -2684,7 +2687,10 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) struct driver_data *driver_data = packet->driver_data; int ret = -ENOENT; - tasklet_disable_in_atomic(&ctx->tasklet); + // Avoid dead lock due to programming mistake. + if (WARN_ON_ONCE(current_work() == &ctx->work)) + return 0; + disable_work_sync(&ctx->work); if (packet->ack != 0) goto out; @@ -2703,7 +2709,7 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet) packet->callback(packet, &ohci->card, packet->ack); ret = 0; out: - tasklet_enable(&ctx->tasklet); + enable_work(&ctx->work); return ret; } @@ -3765,11 +3771,13 @@ static int pci_probe(struct pci_dev *dev, OHCI1394_AsReqTrContextControlSet, handle_at_packet); if (err < 0) return err; + INIT_WORK(&ohci->at_request_ctx.work, ohci_at_context_work); err = context_init(&ohci->at_response_ctx, ohci, OHCI1394_AsRspTrContextControlSet, handle_at_packet); if (err < 0) return err; + INIT_WORK(&ohci->at_response_ctx.work, ohci_at_context_work); reg_write(ohci, OHCI1394_IsoRecvIntMaskSet, ~0); ohci->ir_context_channels = ~0ULL; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index c55b8e30e700..cceb70415ed2 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -308,8 +308,7 @@ struct fw_packet { * For successful transmission, the status code is the ack received * from the destination. Otherwise it is one of the juju-specific * rcodes: RCODE_SEND_ERROR, _CANCELLED, _BUSY, _GENERATION, _NO_ACK. - * The callback can be called from tasklet context and thus - * must never block. + * The callback can be called from workqueue and thus must never block. */ fw_packet_callback_t callback; int ack; @@ -382,6 +381,10 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode * * A variation of __fw_send_request() to generate callback for response subaction without time * stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the + * current context instead. */ static inline void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, @@ -411,6 +414,10 @@ static inline void fw_send_request(struct fw_card *card, struct fw_transaction * * @callback_data: data to be passed to the transaction completion callback * * A variation of __fw_send_request() to generate callback for response subaction with time stamp. + * + * The callback is invoked in the workqueue context in most cases. However, if an error is detected + * before queueing or the destination address refers to the local node, it is invoked in the current + * context instead. */ static inline void fw_send_request_with_tstamp(struct fw_card *card, struct fw_transaction *t, int tcode, int destination_id, int generation, int speed, unsigned long long offset, -- 2.48.1 |
From: Takashi S. <o-t...@sa...> - 2025-06-15 13:33:14
|
This commit adds a work item to handle events of 1394 OHCI AR request/response contexts, and queues the item to the specific workqueue. The call of struct fw_address_handler.address_callback() is done in the workqueue when receiving any requests from the remove nodes. Additionally, the call of struct fw_packet.callback() is done in the workqueue too when receiving acknowledge to the asynchronous packet for the response subaction of split transaction to the remote nodes. Signed-off-by: Takashi Sakamoto <o-t...@sa...> --- drivers/firewire/core-transaction.c | 7 ++++--- drivers/firewire/ohci.c | 27 +++++++++++---------------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 2bd5deb9054e..d28477d84697 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -557,9 +557,10 @@ const struct fw_address_region fw_unit_space_region = * * region->start, ->end, and handler->length have to be quadlet-aligned. * - * When a request is received that falls within the specified address range, - * the specified callback is invoked. The parameters passed to the callback - * give the details of the particular request. + * When a request is received that falls within the specified address range, the specified callback + * is invoked. The parameters passed to the callback give the details of the particular request. + * The callback is invoked in the workqueue context in most cases. However, if the request is + * initiated by the local node, the callback is invoked in the initiator's context. * * To be called in process context. * Return value: 0 on success, non-zero otherwise. diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 40313a3ec63e..68317b5a64a7 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -101,7 +101,7 @@ struct ar_context { void *pointer; unsigned int last_buffer_index; u32 regs; - struct tasklet_struct tasklet; + struct work_struct work; }; struct context; @@ -1016,9 +1016,9 @@ static void ar_recycle_buffers(struct ar_context *ctx, unsigned int end_buffer) } } -static void ar_context_tasklet(unsigned long data) +static void ohci_ar_context_work(struct work_struct *work) { - struct ar_context *ctx = (struct ar_context *)data; + struct ar_context *ctx = from_work(ctx, work, work); unsigned int end_buffer_index, end_buffer_offset; void *p, *end; @@ -1026,23 +1026,19 @@ static void ar_context_tasklet(unsigned long data) if (!p) return; - end_buffer_index = ar_search_last_active_buffer(ctx, - &end_buffer_offset); + end_buffer_index = ar_search_last_active_buffer(ctx, &end_buffer_offset); ar_sync_buffers_for_cpu(ctx, end_buffer_index, end_buffer_offset); end = ctx->buffer + end_buffer_index * PAGE_SIZE + end_buffer_offset; if (end_buffer_index < ar_first_buffer_index(ctx)) { - /* - * The filled part of the overall buffer wraps around; handle - * all packets up to the buffer end here. If the last packet - * wraps around, its tail will be visible after the buffer end - * because the buffer start pages are mapped there again. - */ + // The filled part of the overall buffer wraps around; handle all packets up to the + // buffer end here. If the last packet wraps around, its tail will be visible after + // the buffer end because the buffer start pages are mapped there again. void *buffer_end = ctx->buffer + AR_BUFFERS * PAGE_SIZE; p = handle_ar_packets(ctx, p, buffer_end); if (p < buffer_end) goto error; - /* adjust p to point back into the actual buffer */ + // adjust p to point back into the actual buffer p -= AR_BUFFERS * PAGE_SIZE; } @@ -1057,7 +1053,6 @@ static void ar_context_tasklet(unsigned long data) ar_recycle_buffers(ctx, end_buffer_index); return; - error: ctx->pointer = NULL; } @@ -1073,7 +1068,7 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, ctx->regs = regs; ctx->ohci = ohci; - tasklet_init(&ctx->tasklet, ar_context_tasklet, (unsigned long)ctx); + INIT_WORK(&ctx->work, ohci_ar_context_work); for (i = 0; i < AR_BUFFERS; i++) { ctx->pages[i] = dma_alloc_pages(dev, PAGE_SIZE, &dma_addr, @@ -2238,10 +2233,10 @@ static irqreturn_t irq_handler(int irq, void *data) } if (event & OHCI1394_RQPkt) - tasklet_schedule(&ohci->ar_request_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->ar_request_ctx.work); if (event & OHCI1394_RSPkt) - tasklet_schedule(&ohci->ar_response_ctx.tasklet); + queue_work(ohci->card.async_wq, &ohci->ar_response_ctx.work); if (event & OHCI1394_reqTxComplete) tasklet_schedule(&ohci->at_request_ctx.tasklet); -- 2.48.1 |