From: Hoang L. <hoa...@de...> - 2019-11-14 08:09:30
|
Hi Jon, Please take a look at v2. The mechanism looks the same as I did before in commit: c55c8edafa91 ("tipc: smooth change between replicast and broadcast") However, in this case we handle only one direction: replicast -> broadcast. Then, it is still backward compatible. [...] >From ae2ee6a7064de3ec1dc2c7df2db241d22b0d129f Mon Sep 17 00:00:00 2001 From: Hoang Le <hoa...@de...> Date: Wed, 13 Nov 2019 14:01:03 +0700 Subject: [PATCH] tipc: update a binding service via broadcast Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. Then binding table updates in all nodes for one shot. The mechanism is backward compatible because of sending side changing. v2: resolve synchronization problem when switching from unicast to broadcast Signed-off-by: Hoang Le <hoa...@de...> --- net/tipc/bcast.c | 13 +++++++++++++ net/tipc/bcast.h | 2 ++ net/tipc/link.c | 16 ++++++++++++++++ net/tipc/name_distr.c | 8 ++++++++ net/tipc/name_table.c | 9 ++++++--- 5 files changed, 45 insertions(+), 3 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f41096a759fa..18431fa897ab 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, __skb_queue_tail(inputq, _skb); } } + +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb) +{ + struct sk_buff_head xmitq; + u16 cong_link_cnt; + int rc = 0; + + __skb_queue_head_init(&xmitq); + __skb_queue_tail(&xmitq, skb); + rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt); + __skb_queue_purge(&xmitq); + return rc; +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index dadad953e2be..a100da3800fc 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb); + void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); diff --git a/net/tipc/link.c b/net/tipc/link.c index fb72031228c9..22f1854435df 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -187,6 +187,9 @@ struct tipc_link { struct tipc_link *bc_sndlink; u8 nack_state; bool bc_peer_is_up; + bool named_sync; + struct sk_buff_head defer_namedq; + /* Statistics */ struct tipc_stats stats; @@ -363,6 +366,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; + rcv_l->named_sync = false; if (!snd_l->ackers) { trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!"); tipc_link_reset(snd_l); @@ -508,6 +512,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, __skb_queue_head_init(&l->failover_deferdq); skb_queue_head_init(&l->wakeupq); skb_queue_head_init(l->inputq); + __skb_queue_head_init(&l->defer_namedq); return true; } @@ -932,6 +937,8 @@ void tipc_link_reset(struct tipc_link *l) l->silent_intv_cnt = 0; l->rst_cnt = 0; l->bc_peer_is_up = false; + l->named_sync = false; + __skb_queue_purge(&l->defer_namedq); memset(&l->mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } @@ -1210,6 +1217,15 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; + if (msg_is_syn(hdr)) { + l->bc_rcvlink->named_sync = true; + skb_queue_splice_tail_init(&l->defer_namedq, l->namedq); + return true; + } + if (msg_is_rcast(hdr) && !l->bc_rcvlink->named_sync) { + skb_queue_tail(&l->defer_namedq, skb); + return true; + } skb_queue_tail(l->namedq, skb); return true; case MSG_BUNDLER: diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 5feaf3b67380..419b3f0f102d 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -180,6 +180,14 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); __skb_queue_tail(list, skb); } + + /* Allocate dummy message in order to synchronize w/bcast */ + skb = named_prepare_buf(net, PUBLICATION, 0, dnode); + if (skb) { + /* Preparing for 'synching' header */ + msg_set_syn(buf_msg(skb), 1); + __skb_queue_tail(list, skb); + } } /** diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 66a65c2cdb23..4ba6d73e5c4c 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -632,8 +632,10 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, exit: spin_unlock_bh(&tn->nametbl_lock); - if (skb) - tipc_node_broadcast(net, skb); + if (skb) { + msg_set_is_rcast(buf_msg(skb), true); + tipc_bcast_named_publish(net, skb); + } return p; } @@ -664,7 +666,8 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (skb) { - tipc_node_broadcast(net, skb); + msg_set_is_rcast(buf_msg(skb), true); + tipc_bcast_named_publish(net, skb); return 1; } return 0; -- 2.20.1 -----Original Message----- From: Jon Maloy <jon...@er...> Sent: Wednesday, November 13, 2019 7:02 PM To: Hoang Huu Le <hoa...@de...>; ma...@do...; tip...@li... Cc: 'Ying Xue' <yin...@wi...> Subject: RE: [net-next] tipc: update a binding service via broadcast Hi Hoang, This is good, but you have missed the point about the synchronization problem I have been talking about. 1) A new node comes up 2) The "bulk" binding table update is sent, as a series of packets over the new unicast link. This may take some time. 3) The owner of one of the bindings in the bulk (on this node) does unbind. 4) This is sent as broadcast withdraw to all nodes, and arrives before the last packets of the unicast bulk to the newly connected node. 5) Since there is no corresponding publication in the peer node's binding table yet, the withdraw is ignored. 6) The last bulk unicasts arrive at the new peer, and the now invalid publication is added to its binding table. 7) This publication will stay there forever. We need to find a way to synchronize so that we know that all the bulk publications are in place in the binding table before any broadcast publications/withdraws can be accepted. Obviously, we could create a backlog queue in the name table, but I hope we can find a simpler and neater solution. Regards ///jon > -----Original Message----- > From: Hoang Le <hoa...@de...> > Sent: 13-Nov-19 02:35 > To: Jon Maloy <jon...@er...>; ma...@do...; tip...@li... > Subject: [net-next] tipc: update a binding service via broadcast > > Currently, updating binding table (add service binding to > name table/withdraw a service binding) is being sent over replicast. > However, if we are scaling up clusters to > 100 nodes/containers this > method is less affection because of looping through nodes in a cluster one > by one. > > It is worth to use broadcast to update a binding service. Then binding > table updates in all nodes for one shot. > > The mechanism is backward compatible because of sending side changing. > > Signed-off-by: Hoang Le <hoa...@de...> > --- > net/tipc/bcast.c | 13 +++++++++++++ > net/tipc/bcast.h | 2 ++ > net/tipc/name_table.c | 4 ++-- > 3 files changed, 17 insertions(+), 2 deletions(-) > > diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c > index f41096a759fa..18431fa897ab 100644 > --- a/net/tipc/bcast.c > +++ b/net/tipc/bcast.c > @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, > __skb_queue_tail(inputq, _skb); > } > } > + > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb) > +{ > + struct sk_buff_head xmitq; > + u16 cong_link_cnt; > + int rc = 0; > + > + __skb_queue_head_init(&xmitq); > + __skb_queue_tail(&xmitq, skb); > + rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt); > + __skb_queue_purge(&xmitq); > + return rc; > +} > diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h > index dadad953e2be..a100da3800fc 100644 > --- a/net/tipc/bcast.h > +++ b/net/tipc/bcast.h > @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net); > u32 tipc_bcast_get_broadcast_mode(struct net *net); > u32 tipc_bcast_get_broadcast_ratio(struct net *net); > > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb); > + > void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, > struct sk_buff_head *inputq); > > diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c > index 66a65c2cdb23..9e9c61f7c999 100644 > --- a/net/tipc/name_table.c > +++ b/net/tipc/name_table.c > @@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, > spin_unlock_bh(&tn->nametbl_lock); > > if (skb) > - tipc_node_broadcast(net, skb); > + tipc_bcast_named_publish(net, skb); > return p; > } > > @@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, > spin_unlock_bh(&tn->nametbl_lock); > > if (skb) { > - tipc_node_broadcast(net, skb); > + tipc_bcast_named_publish(net, skb); > return 1; > } > return 0; > -- > 2.20.1 |