Re: [tipc-discussion] [PATCH net] tipc: fix a race in tipc_sk_mcast_rcv

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

On Thu, May 13, 2021 at 5:15 PM Jon Maloy <jm...@re...> wrote:
>
>
>
> On 4/28/21 3:30 PM, Xin Long wrote:
> > After commit cb1b728096f5 ("tipc: eliminate race condition at multicast
> > reception"), when processing the multicast reception, the packets are
> > firstly moved from be->inputq1 to be->arrvq in tipc_node_broadcast(),
> > then process be->arrvq in tipc_sk_mcast_rcv().
> >
> > In tipc_sk_mcast_rcv(), it gets the 1st skb by skb_peek(), then process
> > this skb without any lock. It means meanwhile another thread could also
> > call tipc_sk_mcast_rcv() and process be->arrvq and pick up the same skb,
> > then free it. A double free issue will be caused as Li Shuang reported:
> >
> >    [] kernel BUG at mm/slub.c:305!
> >    []  kfree+0x3a7/0x3d0
> >    []  kfree_skb+0x32/0xa0
> >    []  skb_release_data+0xb4/0x170
> >    []  kfree_skb+0x32/0xa0
> >    []  skb_release_data+0xb4/0x170
> >    []  kfree_skb+0x32/0xa0
> >    []  tipc_sk_mcast_rcv+0x1fa/0x380 [tipc]
> >    []  tipc_rcv+0x411/0x1120 [tipc]
> >    []  tipc_udp_recv+0xc6/0x1e0 [tipc]
> >    []  udp_queue_rcv_one_skb+0x1a9/0x500
> >    []  udp_unicast_rcv_skb.isra.66+0x75/0x90
> >    []  __udp4_lib_rcv+0x537/0xc40
> >    []  ip_protocol_deliver_rcu+0xdf/0x1d0
> >    []  ip_local_deliver_finish+0x4a/0x50
> >    []  ip_local_deliver+0x6b/0xe0
> >    []  ip_rcv+0x27b/0x36a
> >    []  __netif_receive_skb_core+0xb47/0xc40
> >    []  process_backlog+0xae/0x160
> >
> > Commit 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
> > tried to fix this double free by not releasing the skbs in be->arrvq,
> > which would definitely cause the skbs' leak.
> >
> > The problem is we shouldn't process the skbs in be->arrvq without any
> > lock to protect the code from peeking to dequeuing them. The fix here
> > is to use a temp skb list instead of be->arrvq to make it "per thread
> > safe". While at it, remove the no-longer-used be->arrvq.
> >
> > Fixes: cb1b728096f5 ("tipc: eliminate race condition at multicast reception")
> > Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
> > Reported-by: Li Shuang <sh...@re...>
> > Signed-off-by: Xin Long <luc...@gm...>
> > ---
> >   net/tipc/node.c   |  9 ++++-----
> >   net/tipc/socket.c | 16 +++-------------
> >   2 files changed, 7 insertions(+), 18 deletions(-)
> >
> > diff --git a/net/tipc/node.c b/net/tipc/node.c
> > index e0ee832..0c636fb 100644
> > --- a/net/tipc/node.c
> > +++ b/net/tipc/node.c
> > @@ -72,7 +72,6 @@ struct tipc_link_entry {
> >   struct tipc_bclink_entry {
> >       struct tipc_link *link;
> >       struct sk_buff_head inputq1;
> > -     struct sk_buff_head arrvq;
> >       struct sk_buff_head inputq2;
> >       struct sk_buff_head namedq;
> >       u16 named_rcv_nxt;
> > @@ -552,7 +551,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id,
> >       INIT_LIST_HEAD(&n->conn_sks);
> >       skb_queue_head_init(&n->bc_entry.namedq);
> >       skb_queue_head_init(&n->bc_entry.inputq1);
> > -     __skb_queue_head_init(&n->bc_entry.arrvq);
> >       skb_queue_head_init(&n->bc_entry.inputq2);
> >       for (i = 0; i < MAX_BEARERS; i++)
> >               spin_lock_init(&n->links[i].lock);
> > @@ -1803,14 +1801,15 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb, int rc_dests)
> >   static void tipc_node_mcast_rcv(struct tipc_node *n)
> >   {
> >       struct tipc_bclink_entry *be = &n->bc_entry;
> > +     struct sk_buff_head tmpq;
> >
> > -     /* 'arrvq' is under inputq2's lock protection */
> > +     __skb_queue_head_init(&tmpq);
> >       spin_lock_bh(&be->inputq2.lock);
> >       spin_lock_bh(&be->inputq1.lock);
> > -     skb_queue_splice_tail_init(&be->inputq1, &be->arrvq);
> > +     skb_queue_splice_tail_init(&be->inputq1, &tmpq);
> >       spin_unlock_bh(&be->inputq1.lock);
> >       spin_unlock_bh(&be->inputq2.lock);
> > -     tipc_sk_mcast_rcv(n->net, &be->arrvq, &be->inputq2);
> > +     tipc_sk_mcast_rcv(n->net, &tmpq, &be->inputq2);
> >   }
> >
> >   static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
> > diff --git a/net/tipc/socket.c b/net/tipc/socket.c
> > index 022999e..2870798 100644
> > --- a/net/tipc/socket.c
> > +++ b/net/tipc/socket.c
> > @@ -1210,8 +1210,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
> >       __skb_queue_head_init(&tmpq);
> >       INIT_LIST_HEAD(&dports);
> >
> > -     skb = tipc_skb_peek(arrvq, &inputq->lock);
> > -     for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
> > +     while ((skb = __skb_dequeue(arrvq)) != NULL) {
> >               hdr = buf_msg(skb);
> >               user = msg_user(hdr);
> >               mtyp = msg_type(hdr);
> > @@ -1220,13 +1219,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
> >               type = msg_nametype(hdr);
> >
> >               if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
> > -                     spin_lock_bh(&inputq->lock);
> > -                     if (skb_peek(arrvq) == skb) {
> > -                             __skb_dequeue(arrvq);
> > -                             __skb_queue_tail(inputq, skb);
> > -                     }
> > -                     kfree_skb(skb);
> > -                     spin_unlock_bh(&inputq->lock);
> > +                     skb_queue_tail(inputq, skb);
> >                       continue;
> >               }
> >
> > @@ -1263,10 +1256,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
> >               }
> >               /* Append to inputq if not already done by other thread */
> >               spin_lock_bh(&inputq->lock);
> > -             if (skb_peek(arrvq) == skb) {
> > -                     skb_queue_splice_tail_init(&tmpq, inputq);
> > -                     __skb_dequeue(arrvq);
> > -             }
> > +             skb_queue_splice_tail_init(&tmpq, inputq);
> >               spin_unlock_bh(&inputq->lock);
> >               __skb_queue_purge(&tmpq);
> >               kfree_skb(skb);
> Nack.
>
> This would invalidate the sequence guarantee of messages between two
> specific sockets.
> The whole point of having a lock protected arrival queue is to preserve
> the order when messages are moved from inputq1 to inputq2.
> Let's take a discussion on our mailing list.
>
Hi, Jon, thanks for checking this.

I'm making this tipc-discussion only.
The problem you're saying exists even without this patch.
unless we lock it until this dequeued skb enter into the sk's receive queue,
something like:

lock()
skb=dequeue(arrv)
...
tipc_sk_rcv(skb)
unlock()

that's also what other protocols are doing, and the bad side is less
parallel processing.

Re: [tipc-discussion] [PATCH net] tipc: fix a race in tipc_sk_mcast_rcv

Cluster wide IPC providing datagram, connection, and bus messaging

Re: [tipc-discussion] [PATCH net] tipc: fix a race in tipc_sk_mcast_rcv