From: Hoang H. Le <hoa...@de...> - 2020-10-08 07:32:43
|
In the function node_lost_contact(), we call __skb_queue_purge() without grabbing the list->lock. This can cause to a race-condition why processing the list 'namedq' in calling path tipc_named_rcv()->tipc_named_dequeue(). [] BUG: kernel NULL pointer dereference, address: 0000000000000000 [] #PF: supervisor read access in kernel mode [] #PF: error_code(0x0000) - not-present page [] PGD 7ca63067 P4D 7ca63067 PUD 6c553067 PMD 0 [] Oops: 0000 [#1] SMP NOPTI [] CPU: 1 PID: 15 Comm: ksoftirqd/1 Tainted: G O 5.9.0-rc6+ #2 [] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS [...] [] RIP: 0010:tipc_named_rcv+0x103/0x320 [tipc] [] Code: 41 89 44 24 10 49 8b 16 49 8b 46 08 49 c7 06 00 00 00 [...] [] RSP: 0018:ffffc900000a7c58 EFLAGS: 00000282 [] RAX: 00000000000012ec RBX: 0000000000000000 RCX: ffff88807bde1270 [] RDX: 0000000000002c7c RSI: 0000000000002c7c RDI: ffff88807b38f1a8 [] RBP: ffff88807b006288 R08: ffff88806a367800 R09: ffff88806a367900 [] R10: ffff88806a367a00 R11: ffff88806a367b00 R12: ffff88807b006258 [] R13: ffff88807b00628a R14: ffff888069334d00 R15: ffff88806a434600 [] FS: 0000000000000000(0000) GS:ffff888079480000(0000) knlGS:0[...] [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [] CR2: 0000000000000000 CR3: 0000000077320000 CR4: 00000000000006e0 [] Call Trace: [] ? tipc_bcast_rcv+0x9a/0x1a0 [tipc] [] tipc_rcv+0x40d/0x670 [tipc] [] ? _raw_spin_unlock+0xa/0x20 [] tipc_l2_rcv_msg+0x55/0x80 [tipc] [] __netif_receive_skb_one_core+0x8c/0xa0 [] process_backlog+0x98/0x140 [] net_rx_action+0x13a/0x420 [] __do_softirq+0xdb/0x316 [] ? smpboot_thread_fn+0x2f/0x1e0 [] ? smpboot_thread_fn+0x74/0x1e0 [] ? smpboot_thread_fn+0x14e/0x1e0 [] run_ksoftirqd+0x1a/0x40 [] smpboot_thread_fn+0x149/0x1e0 [] ? sort_range+0x20/0x20 [] kthread+0x131/0x150 [] ? kthread_unuse_mm+0xa0/0xa0 [] ret_from_fork+0x22/0x30 [] Modules linked in: veth tipc(O) ip6_udp_tunnel udp_tunnel [...] [] CR2: 0000000000000000 [] ---[ end trace 65c276a8e2e2f310 ]--- To fix this, we need to grab the lock of the 'namedq' list on both path calling. Fixes: cad2929dc432 ("tipc: update a binding service via broadcast") Acked-by: Jon Maloy <jm...@re...> Signed-off-by: Hoang Huu Le <hoa...@de...> --- net/tipc/name_distr.c | 10 +++++++++- net/tipc/node.c | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 2f9c148f17e2..fe4edce459ad 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, struct tipc_msg *hdr; u16 seqno; + spin_lock_bh(&namedq->lock); skb_queue_walk_safe(namedq, skb, tmp) { - skb_linearize(skb); + if (unlikely(skb_linearize(skb))) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } hdr = buf_msg(skb); seqno = msg_named_seqno(hdr); if (msg_is_last_bulk(hdr)) { @@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); return skb; } if (*open && (*rcv_nxt == seqno)) { (*rcv_nxt)++; __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); return skb; } @@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, continue; } } + spin_unlock_bh(&namedq->lock); return NULL; } diff --git a/net/tipc/node.c b/net/tipc/node.c index cf4b239fc569..d269ebe382e1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n, /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); - __skb_queue_purge(&n->bc_entry.namedq); + skb_queue_purge(&n->bc_entry.namedq); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { -- 2.25.1 |
From: Hoang H. Le <hoa...@de...> - 2020-10-07 12:14:24
|
In the function node_lost_contact(), we call __skb_queue_purge() without grabbing the list->lock. This can cause to a race-condition why processing the list 'namedq' in calling path tipc_named_rcv()->tipc_named_dequeue(). [] BUG: kernel NULL pointer dereference, address: 0000000000000000 [] #PF: supervisor read access in kernel mode [] #PF: error_code(0x0000) - not-present page [] PGD 7ca63067 P4D 7ca63067 PUD 6c553067 PMD 0 [] Oops: 0000 [#1] SMP NOPTI [] CPU: 1 PID: 15 Comm: ksoftirqd/1 Tainted: G O 5.9.0-rc6+ #2 [] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS [...] [] RIP: 0010:tipc_named_rcv+0x103/0x320 [tipc] [] Code: 41 89 44 24 10 49 8b 16 49 8b 46 08 49 c7 06 00 00 00 [...] [] RSP: 0018:ffffc900000a7c58 EFLAGS: 00000282 [] RAX: 00000000000012ec RBX: 0000000000000000 RCX: ffff88807bde1270 [] RDX: 0000000000002c7c RSI: 0000000000002c7c RDI: ffff88807b38f1a8 [] RBP: ffff88807b006288 R08: ffff88806a367800 R09: ffff88806a367900 [] R10: ffff88806a367a00 R11: ffff88806a367b00 R12: ffff88807b006258 [] R13: ffff88807b00628a R14: ffff888069334d00 R15: ffff88806a434600 [] FS: 0000000000000000(0000) GS:ffff888079480000(0000) knlGS:0[...] [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [] CR2: 0000000000000000 CR3: 0000000077320000 CR4: 00000000000006e0 [] Call Trace: [] ? tipc_bcast_rcv+0x9a/0x1a0 [tipc] [] tipc_rcv+0x40d/0x670 [tipc] [] ? _raw_spin_unlock+0xa/0x20 [] tipc_l2_rcv_msg+0x55/0x80 [tipc] [] __netif_receive_skb_one_core+0x8c/0xa0 [] process_backlog+0x98/0x140 [] net_rx_action+0x13a/0x420 [] __do_softirq+0xdb/0x316 [] ? smpboot_thread_fn+0x2f/0x1e0 [] ? smpboot_thread_fn+0x74/0x1e0 [] ? smpboot_thread_fn+0x14e/0x1e0 [] run_ksoftirqd+0x1a/0x40 [] smpboot_thread_fn+0x149/0x1e0 [] ? sort_range+0x20/0x20 [] kthread+0x131/0x150 [] ? kthread_unuse_mm+0xa0/0xa0 [] ret_from_fork+0x22/0x30 [] Modules linked in: veth tipc(O) ip6_udp_tunnel udp_tunnel [...] [] CR2: 0000000000000000 [] ---[ end trace 65c276a8e2e2f310 ]--- To fix this, we need to grab the lock of the 'namedq' list on both path calling. Fixes: cad2929dc432 ("tipc: update a binding service via broadcast") Signed-off-by: Hoang Huu Le <hoa...@de...> --- net/tipc/name_distr.c | 10 +++++++++- net/tipc/node.c | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 2f9c148f17e2..fe4edce459ad 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, struct tipc_msg *hdr; u16 seqno; + spin_lock_bh(&namedq->lock); skb_queue_walk_safe(namedq, skb, tmp) { - skb_linearize(skb); + if (unlikely(skb_linearize(skb))) { + __skb_unlink(skb, namedq); + kfree_skb(skb); + continue; + } hdr = buf_msg(skb); seqno = msg_named_seqno(hdr); if (msg_is_last_bulk(hdr)) { @@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); return skb; } if (*open && (*rcv_nxt == seqno)) { (*rcv_nxt)++; __skb_unlink(skb, namedq); + spin_unlock_bh(&namedq->lock); return skb; } @@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, continue; } } + spin_unlock_bh(&namedq->lock); return NULL; } diff --git a/net/tipc/node.c b/net/tipc/node.c index cf4b239fc569..d269ebe382e1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n, /* Clean up broadcast state */ tipc_bcast_remove_peer(n->net, n->bc_entry.link); - __skb_queue_purge(&n->bc_entry.namedq); + skb_queue_purge(&n->bc_entry.namedq); /* Abort any ongoing link failover */ for (i = 0; i < MAX_BEARERS; i++) { -- 2.25.1 |
From: Jon M. <jm...@re...> - 2020-10-07 12:48:46
|
On 10/7/20 8:13 AM, Hoang Huu Le wrote: > In the function node_lost_contact(), we call __skb_queue_purge() without > grabbing the list->lock. This can cause to a race-condition why processing > the list 'namedq' in calling path tipc_named_rcv()->tipc_named_dequeue(). > > [] BUG: kernel NULL pointer dereference, address: 0000000000000000 > [] #PF: supervisor read access in kernel mode > [] #PF: error_code(0x0000) - not-present page > [] PGD 7ca63067 P4D 7ca63067 PUD 6c553067 PMD 0 > [] Oops: 0000 [#1] SMP NOPTI > [] CPU: 1 PID: 15 Comm: ksoftirqd/1 Tainted: G O 5.9.0-rc6+ #2 > [] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS [...] > [] RIP: 0010:tipc_named_rcv+0x103/0x320 [tipc] > [] Code: 41 89 44 24 10 49 8b 16 49 8b 46 08 49 c7 06 00 00 00 [...] > [] RSP: 0018:ffffc900000a7c58 EFLAGS: 00000282 > [] RAX: 00000000000012ec RBX: 0000000000000000 RCX: ffff88807bde1270 > [] RDX: 0000000000002c7c RSI: 0000000000002c7c RDI: ffff88807b38f1a8 > [] RBP: ffff88807b006288 R08: ffff88806a367800 R09: ffff88806a367900 > [] R10: ffff88806a367a00 R11: ffff88806a367b00 R12: ffff88807b006258 > [] R13: ffff88807b00628a R14: ffff888069334d00 R15: ffff88806a434600 > [] FS: 0000000000000000(0000) GS:ffff888079480000(0000) knlGS:0[...] > [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [] CR2: 0000000000000000 CR3: 0000000077320000 CR4: 00000000000006e0 > [] Call Trace: > [] ? tipc_bcast_rcv+0x9a/0x1a0 [tipc] > [] tipc_rcv+0x40d/0x670 [tipc] > [] ? _raw_spin_unlock+0xa/0x20 > [] tipc_l2_rcv_msg+0x55/0x80 [tipc] > [] __netif_receive_skb_one_core+0x8c/0xa0 > [] process_backlog+0x98/0x140 > [] net_rx_action+0x13a/0x420 > [] __do_softirq+0xdb/0x316 > [] ? smpboot_thread_fn+0x2f/0x1e0 > [] ? smpboot_thread_fn+0x74/0x1e0 > [] ? smpboot_thread_fn+0x14e/0x1e0 > [] run_ksoftirqd+0x1a/0x40 > [] smpboot_thread_fn+0x149/0x1e0 > [] ? sort_range+0x20/0x20 > [] kthread+0x131/0x150 > [] ? kthread_unuse_mm+0xa0/0xa0 > [] ret_from_fork+0x22/0x30 > [] Modules linked in: veth tipc(O) ip6_udp_tunnel udp_tunnel [...] > [] CR2: 0000000000000000 > [] ---[ end trace 65c276a8e2e2f310 ]--- > > To fix this, we need to grab the lock of the 'namedq' list on both > path calling. > > Fixes: cad2929dc432 ("tipc: update a binding service via broadcast") > Signed-off-by: Hoang Huu Le <hoa...@de...> > --- > net/tipc/name_distr.c | 10 +++++++++- > net/tipc/node.c | 2 +- > 2 files changed, 10 insertions(+), 2 deletions(-) > > diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c > index 2f9c148f17e2..fe4edce459ad 100644 > --- a/net/tipc/name_distr.c > +++ b/net/tipc/name_distr.c > @@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, > struct tipc_msg *hdr; > u16 seqno; > > + spin_lock_bh(&namedq->lock); > skb_queue_walk_safe(namedq, skb, tmp) { > - skb_linearize(skb); > + if (unlikely(skb_linearize(skb))) { > + __skb_unlink(skb, namedq); > + kfree_skb(skb); > + continue; > + } > hdr = buf_msg(skb); > seqno = msg_named_seqno(hdr); > if (msg_is_last_bulk(hdr)) { > @@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, > > if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { > __skb_unlink(skb, namedq); > + spin_unlock_bh(&namedq->lock); > return skb; > } > > if (*open && (*rcv_nxt == seqno)) { > (*rcv_nxt)++; > __skb_unlink(skb, namedq); > + spin_unlock_bh(&namedq->lock); > return skb; > } > > @@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, > continue; > } > } > + spin_unlock_bh(&namedq->lock); > return NULL; > } > > diff --git a/net/tipc/node.c b/net/tipc/node.c > index cf4b239fc569..d269ebe382e1 100644 > --- a/net/tipc/node.c > +++ b/net/tipc/node.c > @@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n, > > /* Clean up broadcast state */ > tipc_bcast_remove_peer(n->net, n->bc_entry.link); > - __skb_queue_purge(&n->bc_entry.namedq); > + skb_queue_purge(&n->bc_entry.namedq); > > /* Abort any ongoing link failover */ > for (i = 0; i < MAX_BEARERS; i++) { Looks ok. Acked-by: jon Maloy <jm...@re...> |
From: Jon M. <jm...@re...> - 2020-10-08 18:01:07
|
On 10/8/20 1:25 PM, Jakub Kicinski wrote: > On Thu, 8 Oct 2020 14:31:56 +0700 Hoang Huu Le wrote: >> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c >> index 2f9c148f17e2..fe4edce459ad 100644 >> --- a/net/tipc/name_distr.c >> +++ b/net/tipc/name_distr.c >> @@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, >> struct tipc_msg *hdr; >> u16 seqno; >> >> + spin_lock_bh(&namedq->lock); >> skb_queue_walk_safe(namedq, skb, tmp) { >> - skb_linearize(skb); >> + if (unlikely(skb_linearize(skb))) { >> + __skb_unlink(skb, namedq); >> + kfree_skb(skb); >> + continue; >> + } >> hdr = buf_msg(skb); >> seqno = msg_named_seqno(hdr); >> if (msg_is_last_bulk(hdr)) { >> @@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, >> >> if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { >> __skb_unlink(skb, namedq); >> + spin_unlock_bh(&namedq->lock); >> return skb; >> } >> >> if (*open && (*rcv_nxt == seqno)) { >> (*rcv_nxt)++; >> __skb_unlink(skb, namedq); >> + spin_unlock_bh(&namedq->lock); >> return skb; >> } >> >> @@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, >> continue; >> } >> } >> + spin_unlock_bh(&namedq->lock); >> return NULL; >> } >> >> diff --git a/net/tipc/node.c b/net/tipc/node.c >> index cf4b239fc569..d269ebe382e1 100644 >> --- a/net/tipc/node.c >> +++ b/net/tipc/node.c >> @@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n, >> >> /* Clean up broadcast state */ >> tipc_bcast_remove_peer(n->net, n->bc_entry.link); >> - __skb_queue_purge(&n->bc_entry.namedq); >> + skb_queue_purge(&n->bc_entry.namedq); > Patch looks fine, but I'm not sure why not hold > spin_unlock_bh(&tn->nametbl_lock) here instead? > > Seems like node_lost_contact() should be relatively rare, > so adding another lock to tipc_named_dequeue() is not the > right trade off. Actually, I agree with previous speaker here. We already have the nametbl_lock when tipc_named_dequeue() is called, and the same lock is accessible from no.c where node_lost_contact() is executed. The patch and the code becomes simpler. I suggest you post a v2 of this one. ///jon >> /* Abort any ongoing link failover */ >> for (i = 0; i < MAX_BEARERS; i++) { |
From: Hoang H. Le <hoa...@de...> - 2020-10-09 04:13:01
|
Hi Jon, Jakub, I tried with your comment. But looks like we got into circular locking and deadlock could happen like this: CPU0 CPU1 ---- ---- lock(&n->lock#2); lock(&tn->nametbl_lock); lock(&n->lock#2); lock(&tn->nametbl_lock); *** DEADLOCK *** Regards, Hoang > -----Original Message----- > From: Jon Maloy <jm...@re...> > Sent: Friday, October 9, 2020 1:01 AM > To: Jakub Kicinski <ku...@ke...>; Hoang Huu Le <hoa...@de...> > Cc: ma...@do...; yin...@wi...; tip...@li...; ne...@vg... > Subject: Re: [net] tipc: fix NULL pointer dereference in tipc_named_rcv > > > > On 10/8/20 1:25 PM, Jakub Kicinski wrote: > > On Thu, 8 Oct 2020 14:31:56 +0700 Hoang Huu Le wrote: > >> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c > >> index 2f9c148f17e2..fe4edce459ad 100644 > >> --- a/net/tipc/name_distr.c > >> +++ b/net/tipc/name_distr.c > >> @@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, > >> struct tipc_msg *hdr; > >> u16 seqno; > >> > >> + spin_lock_bh(&namedq->lock); > >> skb_queue_walk_safe(namedq, skb, tmp) { > >> - skb_linearize(skb); > >> + if (unlikely(skb_linearize(skb))) { > >> + __skb_unlink(skb, namedq); > >> + kfree_skb(skb); > >> + continue; > >> + } > >> hdr = buf_msg(skb); > >> seqno = msg_named_seqno(hdr); > >> if (msg_is_last_bulk(hdr)) { > >> @@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, > >> > >> if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { > >> __skb_unlink(skb, namedq); > >> + spin_unlock_bh(&namedq->lock); > >> return skb; > >> } > >> > >> if (*open && (*rcv_nxt == seqno)) { > >> (*rcv_nxt)++; > >> __skb_unlink(skb, namedq); > >> + spin_unlock_bh(&namedq->lock); > >> return skb; > >> } > >> > >> @@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, > >> continue; > >> } > >> } > >> + spin_unlock_bh(&namedq->lock); > >> return NULL; > >> } > >> > >> diff --git a/net/tipc/node.c b/net/tipc/node.c > >> index cf4b239fc569..d269ebe382e1 100644 > >> --- a/net/tipc/node.c > >> +++ b/net/tipc/node.c > >> @@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n, > >> > >> /* Clean up broadcast state */ > >> tipc_bcast_remove_peer(n->net, n->bc_entry.link); > >> - __skb_queue_purge(&n->bc_entry.namedq); > >> + skb_queue_purge(&n->bc_entry.namedq); > > Patch looks fine, but I'm not sure why not hold > > spin_unlock_bh(&tn->nametbl_lock) here instead? > > > > Seems like node_lost_contact() should be relatively rare, > > so adding another lock to tipc_named_dequeue() is not the > > right trade off. > Actually, I agree with previous speaker here. We already have the > nametbl_lock when tipc_named_dequeue() is called, and the same lock is > accessible from no.c where node_lost_contact() is executed. The patch > and the code becomes simpler. > I suggest you post a v2 of this one. > > ///jon > > >> /* Abort any ongoing link failover */ > >> for (i = 0; i < MAX_BEARERS; i++) { |
From: Jon M. <jm...@re...> - 2020-10-09 12:48:17
|
On 10/9/20 12:12 AM, Hoang Huu Le wrote: > Hi Jon, Jakub, > > I tried with your comment. But looks like we got into circular locking and deadlock could happen like this: > CPU0 CPU1 > ---- ---- > lock(&n->lock#2); > lock(&tn->nametbl_lock); > lock(&n->lock#2); > lock(&tn->nametbl_lock); > > *** DEADLOCK *** > > Regards, > Hoang Ok. So although your solution is not optimal, we know it is safe. Again: Acked-by: Jon Maloy <jm...@re...> >> -----Original Message----- >> From: Jon Maloy <jm...@re...> >> Sent: Friday, October 9, 2020 1:01 AM >> To: Jakub Kicinski <ku...@ke...>; Hoang Huu Le <hoa...@de...> >> Cc: ma...@do...; yin...@wi...; tip...@li...; ne...@vg... >> Subject: Re: [net] tipc: fix NULL pointer dereference in tipc_named_rcv >> >> >> >> On 10/8/20 1:25 PM, Jakub Kicinski wrote: >>> On Thu, 8 Oct 2020 14:31:56 +0700 Hoang Huu Le wrote: >>>> diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c >>>> index 2f9c148f17e2..fe4edce459ad 100644 >>>> --- a/net/tipc/name_distr.c >>>> +++ b/net/tipc/name_distr.c >>>> @@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, >>>> struct tipc_msg *hdr; >>>> u16 seqno; >>>> >>>> + spin_lock_bh(&namedq->lock); >>>> skb_queue_walk_safe(namedq, skb, tmp) { >>>> - skb_linearize(skb); >>>> + if (unlikely(skb_linearize(skb))) { >>>> + __skb_unlink(skb, namedq); >>>> + kfree_skb(skb); >>>> + continue; >>>> + } >>>> hdr = buf_msg(skb); >>>> seqno = msg_named_seqno(hdr); >>>> if (msg_is_last_bulk(hdr)) { >>>> @@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, >>>> >>>> if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) { >>>> __skb_unlink(skb, namedq); >>>> + spin_unlock_bh(&namedq->lock); >>>> return skb; >>>> } >>>> >>>> if (*open && (*rcv_nxt == seqno)) { >>>> (*rcv_nxt)++; >>>> __skb_unlink(skb, namedq); >>>> + spin_unlock_bh(&namedq->lock); >>>> return skb; >>>> } >>>> >>>> @@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq, >>>> continue; >>>> } >>>> } >>>> + spin_unlock_bh(&namedq->lock); >>>> return NULL; >>>> } >>>> >>>> diff --git a/net/tipc/node.c b/net/tipc/node.c >>>> index cf4b239fc569..d269ebe382e1 100644 >>>> --- a/net/tipc/node.c >>>> +++ b/net/tipc/node.c >>>> @@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n, >>>> >>>> /* Clean up broadcast state */ >>>> tipc_bcast_remove_peer(n->net, n->bc_entry.link); >>>> - __skb_queue_purge(&n->bc_entry.namedq); >>>> + skb_queue_purge(&n->bc_entry.namedq); >>> Patch looks fine, but I'm not sure why not hold >>> spin_unlock_bh(&tn->nametbl_lock) here instead? >>> >>> Seems like node_lost_contact() should be relatively rare, >>> so adding another lock to tipc_named_dequeue() is not the >>> right trade off. >> Actually, I agree with previous speaker here. We already have the >> nametbl_lock when tipc_named_dequeue() is called, and the same lock is >> accessible from no.c where node_lost_contact() is executed. The patch >> and the code becomes simpler. >> I suggest you post a v2 of this one. >> >> ///jon >> >>>> /* Abort any ongoing link failover */ >>>> for (i = 0; i < MAX_BEARERS; i++) { |