You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(6) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(9) |
Feb
(11) |
Mar
(22) |
Apr
(73) |
May
(78) |
Jun
(146) |
Jul
(80) |
Aug
(27) |
Sep
(5) |
Oct
(14) |
Nov
(18) |
Dec
(27) |
2005 |
Jan
(20) |
Feb
(30) |
Mar
(19) |
Apr
(28) |
May
(50) |
Jun
(31) |
Jul
(32) |
Aug
(14) |
Sep
(36) |
Oct
(43) |
Nov
(74) |
Dec
(63) |
2006 |
Jan
(34) |
Feb
(32) |
Mar
(21) |
Apr
(76) |
May
(106) |
Jun
(72) |
Jul
(70) |
Aug
(175) |
Sep
(130) |
Oct
(39) |
Nov
(81) |
Dec
(43) |
2007 |
Jan
(81) |
Feb
(36) |
Mar
(20) |
Apr
(43) |
May
(54) |
Jun
(34) |
Jul
(44) |
Aug
(55) |
Sep
(44) |
Oct
(54) |
Nov
(43) |
Dec
(41) |
2008 |
Jan
(42) |
Feb
(84) |
Mar
(73) |
Apr
(30) |
May
(119) |
Jun
(54) |
Jul
(54) |
Aug
(93) |
Sep
(173) |
Oct
(130) |
Nov
(145) |
Dec
(153) |
2009 |
Jan
(59) |
Feb
(12) |
Mar
(28) |
Apr
(18) |
May
(56) |
Jun
(9) |
Jul
(28) |
Aug
(62) |
Sep
(16) |
Oct
(19) |
Nov
(15) |
Dec
(17) |
2010 |
Jan
(14) |
Feb
(36) |
Mar
(37) |
Apr
(30) |
May
(33) |
Jun
(53) |
Jul
(42) |
Aug
(50) |
Sep
(67) |
Oct
(66) |
Nov
(69) |
Dec
(36) |
2011 |
Jan
(52) |
Feb
(45) |
Mar
(49) |
Apr
(21) |
May
(34) |
Jun
(13) |
Jul
(19) |
Aug
(37) |
Sep
(43) |
Oct
(10) |
Nov
(23) |
Dec
(30) |
2012 |
Jan
(42) |
Feb
(36) |
Mar
(46) |
Apr
(25) |
May
(96) |
Jun
(146) |
Jul
(40) |
Aug
(28) |
Sep
(61) |
Oct
(45) |
Nov
(100) |
Dec
(53) |
2013 |
Jan
(79) |
Feb
(24) |
Mar
(134) |
Apr
(156) |
May
(118) |
Jun
(75) |
Jul
(278) |
Aug
(145) |
Sep
(136) |
Oct
(168) |
Nov
(137) |
Dec
(439) |
2014 |
Jan
(284) |
Feb
(158) |
Mar
(231) |
Apr
(275) |
May
(259) |
Jun
(91) |
Jul
(222) |
Aug
(215) |
Sep
(165) |
Oct
(166) |
Nov
(211) |
Dec
(150) |
2015 |
Jan
(164) |
Feb
(324) |
Mar
(299) |
Apr
(214) |
May
(111) |
Jun
(109) |
Jul
(105) |
Aug
(36) |
Sep
(58) |
Oct
(131) |
Nov
(68) |
Dec
(30) |
2016 |
Jan
(46) |
Feb
(87) |
Mar
(135) |
Apr
(174) |
May
(132) |
Jun
(135) |
Jul
(149) |
Aug
(125) |
Sep
(79) |
Oct
(49) |
Nov
(95) |
Dec
(102) |
2017 |
Jan
(104) |
Feb
(75) |
Mar
(72) |
Apr
(53) |
May
(18) |
Jun
(5) |
Jul
(14) |
Aug
(19) |
Sep
(2) |
Oct
(13) |
Nov
(21) |
Dec
(67) |
2018 |
Jan
(56) |
Feb
(50) |
Mar
(148) |
Apr
(41) |
May
(37) |
Jun
(34) |
Jul
(34) |
Aug
(11) |
Sep
(52) |
Oct
(48) |
Nov
(28) |
Dec
(46) |
2019 |
Jan
(29) |
Feb
(63) |
Mar
(95) |
Apr
(54) |
May
(14) |
Jun
(71) |
Jul
(60) |
Aug
(49) |
Sep
(3) |
Oct
(64) |
Nov
(115) |
Dec
(57) |
2020 |
Jan
(15) |
Feb
(9) |
Mar
(38) |
Apr
(27) |
May
(60) |
Jun
(53) |
Jul
(35) |
Aug
(46) |
Sep
(37) |
Oct
(64) |
Nov
(20) |
Dec
(25) |
2021 |
Jan
(20) |
Feb
(31) |
Mar
(27) |
Apr
(23) |
May
(21) |
Jun
(30) |
Jul
(30) |
Aug
(7) |
Sep
(18) |
Oct
|
Nov
(15) |
Dec
(4) |
2022 |
Jan
(3) |
Feb
(1) |
Mar
(10) |
Apr
|
May
(2) |
Jun
(26) |
Jul
(5) |
Aug
|
Sep
(1) |
Oct
(2) |
Nov
(9) |
Dec
(2) |
2023 |
Jan
(4) |
Feb
(4) |
Mar
(5) |
Apr
(10) |
May
(29) |
Jun
(17) |
Jul
|
Aug
|
Sep
(1) |
Oct
(1) |
Nov
(2) |
Dec
|
2024 |
Jan
|
Feb
(6) |
Mar
|
Apr
(1) |
May
(6) |
Jun
|
Jul
(5) |
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2025 |
Jan
|
Feb
(3) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(6) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Tuong L. T. <tuo...@de...> - 2019-11-14 02:03:22
|
Thanks Dan, This is however expected, the function will be shortly returned at line 1727 i.e. the '-ENOKEY' case, so never be dereferenced inside the tipc_crypto_rcv_complete()! BR/Tuong -----Original Message----- From: Dan Carpenter <dan...@or...> Sent: Thursday, November 14, 2019 1:33 AM To: tuo...@de... Cc: tip...@li... Subject: [bug report] tipc: introduce TIPC encryption & authentication Hello Tuong Lien, This is a semi-automatic email about new static checker warnings. The patch fc1b6d6de220: "tipc: introduce TIPC encryption & authentication" from Nov 8, 2019, leads to the following Smatch complaint: net/tipc/crypto.c:1734 tipc_crypto_rcv() error: we previously assumed 'aead' could be null (see line 1697) net/tipc/crypto.c 1696 aead = tipc_crypto_key_pick_tx(tx, rx, *skb); 1697 if (aead) 1698 goto decrypt; 1699 goto exit; ^^^^^^^^^^ "aead" is NULL here. 1700 1701 decrypt: 1702 rcu_read_lock(); 1703 if (!aead) 1704 aead = tipc_aead_get(rx->aead[tx_key]); 1705 rc = tipc_aead_decrypt(net, aead, *skb, b); 1706 rcu_read_unlock(); 1707 1708 exit: 1709 stats = ((rx) ?: tx)->stats; 1710 switch (rc) { 1711 case 0: 1712 this_cpu_inc(stats->stat[STAT_OK]); 1713 break; 1714 case -EINPROGRESS: 1715 case -EBUSY: 1716 this_cpu_inc(stats->stat[STAT_ASYNC]); 1717 *skb = NULL; 1718 return rc; 1719 default: 1720 this_cpu_inc(stats->stat[STAT_NOK]); 1721 if (rc == -ENOKEY) { 1722 kfree_skb(*skb); 1723 *skb = NULL; 1724 if (rx) 1725 tipc_node_put(rx->node); 1726 this_cpu_inc(stats->stat[STAT_NOKEYS]); 1727 return rc; 1728 } else if (rc == -EBADMSG) { 1729 this_cpu_inc(stats->stat[STAT_BADMSGS]); 1730 } 1731 break; 1732 } 1733 1734 tipc_crypto_rcv_complete(net, aead, b, skb, rc); ^^^^ Dereferenced inside function. 1735 return rc; 1736 } regards, dan carpenter |
From: Jon M. <jon...@er...> - 2019-11-14 00:17:39
|
Acked-by: Jon Maloy <jon...@er...> > -----Original Message----- > From: net...@vg... <net...@vg...> On Behalf Of Matt Bennett > Sent: 13-Nov-19 18:20 > To: Jon Maloy <jon...@er...>; yin...@wi...; da...@da...; > ne...@vg...; tip...@li... > Cc: lin...@vg...; Matt Bennett <mat...@al...> > Subject: [PATCH] tipc: add back tipc prefix to log messages > > The tipc prefix for log messages generated by tipc was > removed in commit 07f6c4bc048a ("tipc: convert tipc reference > table to use generic rhashtable"). > > This is still a useful prefix so add it back. > > Signed-off-by: Matt Bennett <mat...@al...> > --- > net/tipc/core.c | 2 -- > net/tipc/core.h | 6 ++++++ > 2 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/net/tipc/core.c b/net/tipc/core.c > index 23cb379a93d6..8f35060a24e1 100644 > --- a/net/tipc/core.c > +++ b/net/tipc/core.c > @@ -34,8 +34,6 @@ > * POSSIBILITY OF SUCH DAMAGE. > */ > > -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > - > #include "core.h" > #include "name_table.h" > #include "subscr.h" > diff --git a/net/tipc/core.h b/net/tipc/core.h > index 60d829581068..3042f654e0af 100644 > --- a/net/tipc/core.h > +++ b/net/tipc/core.h > @@ -60,6 +60,12 @@ > #include <linux/rhashtable.h> > #include <net/genetlink.h> > > +#ifdef pr_fmt > +#undef pr_fmt > +#endif > + > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > struct tipc_node; > struct tipc_bearer; > struct tipc_bc_base; > -- > 2.24.0 |
From: Jon M. <jon...@er...> - 2019-11-13 14:06:16
|
Good observation. However, this re-introduces a problem I discovered and fixed. When a socket is being shutdown, and may be in DISCONNCTING state, there may still be messages in the write queue, and those must be pushed out before we can delete the socket, otherwise the connection hasn't finished its task. I noticed this because the benchmark program didn't finish until I had fixed this. I think the safest solution is to actually peek the first message in the queue and return if that is a SYN message. That way, we could also eliminate the corresponding test I do in __tipc_shutdown(), and just call push_backlog() unconditionally there. ///jon > -----Original Message----- > From: Tung Nguyen <tun...@de...> > Sent: 13-Nov-19 07:32 > To: tip...@li...; Jon Maloy <jon...@er...>; ma...@do...; > yin...@wi... > Subject: [tipc-discussion] [net v1 1/1] tipc: fix duplicate SYN messages under link congestion > > Scenario: > 1. A client socket initiates a SYN message to a listening socket. > 2. The send link is congested, the SYN message is put in the > send link and a wakeup message is put in wakeup queue. > 3. The congestion situation is abated, the wakeup message is > pulled out of the wakeup queue. Function tipc_sk_push_backlog() > is called to send out delayed messages by Nagle. However, > the client socket is still in CONNECTING state. So, it sends > the SYN message in the socket write queue to the listening socket > again. > 4. The listening socket receives the first SYN message and creates > first server socket. The client socket receives ACK- and establishes > a connection to the first server socket. The client socket closes > its connection with the first server socket. > 5. The listening socket receives the second SYN message and creates > second server socket. The second server socket sends ACK- to the > client socket, but it has been closed. It results in connection > refuse error when reading from the server socket in user space. > > Solution: return from function tipc_sk_push_backlog() immediately > if the client socket state is not ESTABLISHED. > > Fixes: c0bceb97db9e ("tipc: add smart nagle feature") > Signed-off-by: Tung Nguyen <tun...@de...> > --- > net/tipc/socket.c | 7 ++++++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c > index 5d7859a..61f9da4 100644 > --- a/net/tipc/socket.c > +++ b/net/tipc/socket.c > @@ -1246,13 +1246,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, > static void tipc_sk_push_backlog(struct tipc_sock *tsk) > { > struct sk_buff_head *txq = &tsk->sk.sk_write_queue; > - struct net *net = sock_net(&tsk->sk); > + struct sock *sk = &tsk->sk; > + struct net *net = sock_net(sk); > u32 dnode = tsk_peer_node(tsk); > int rc; > > if (skb_queue_empty(txq) || tsk->cong_link_cnt) > return; > > + /* Do not send SYN again after congestion */ > + if (sk->sk_state != TIPC_ESTABLISHED) > + return; > + > tsk->snt_unacked += tsk->snd_backlog; > tsk->snd_backlog = 0; > tsk->expect_ack = true; > -- > 2.1.4 |
From: Tung N. <tun...@de...> - 2019-11-13 12:32:31
|
Scenario: 1. A client socket initiates a SYN message to a listening socket. 2. The send link is congested, the SYN message is put in the send link and a wakeup message is put in wakeup queue. 3. The congestion situation is abated, the wakeup message is pulled out of the wakeup queue. Function tipc_sk_push_backlog() is called to send out delayed messages by Nagle. However, the client socket is still in CONNECTING state. So, it sends the SYN message in the socket write queue to the listening socket again. 4. The listening socket receives the first SYN message and creates first server socket. The client socket receives ACK- and establishes a connection to the first server socket. The client socket closes its connection with the first server socket. 5. The listening socket receives the second SYN message and creates second server socket. The second server socket sends ACK- to the client socket, but it has been closed. It results in connection refuse error when reading from the server socket in user space. Solution: return from function tipc_sk_push_backlog() immediately if the client socket state is not ESTABLISHED. Fixes: c0bceb97db9e ("tipc: add smart nagle feature") Signed-off-by: Tung Nguyen <tun...@de...> --- net/tipc/socket.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d7859a..61f9da4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1246,13 +1246,18 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, static void tipc_sk_push_backlog(struct tipc_sock *tsk) { struct sk_buff_head *txq = &tsk->sk.sk_write_queue; - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); u32 dnode = tsk_peer_node(tsk); int rc; if (skb_queue_empty(txq) || tsk->cong_link_cnt) return; + /* Do not send SYN again after congestion */ + if (sk->sk_state != TIPC_ESTABLISHED) + return; + tsk->snt_unacked += tsk->snd_backlog; tsk->snd_backlog = 0; tsk->expect_ack = true; -- 2.1.4 |
From: Jon M. <jon...@er...> - 2019-11-13 12:02:19
|
Hi Hoang, This is good, but you have missed the point about the synchronization problem I have been talking about. 1) A new node comes up 2) The "bulk" binding table update is sent, as a series of packets over the new unicast link. This may take some time. 3) The owner of one of the bindings in the bulk (on this node) does unbind. 4) This is sent as broadcast withdraw to all nodes, and arrives before the last packets of the unicast bulk to the newly connected node. 5) Since there is no corresponding publication in the peer node's binding table yet, the withdraw is ignored. 6) The last bulk unicasts arrive at the new peer, and the now invalid publication is added to its binding table. 7) This publication will stay there forever. We need to find a way to synchronize so that we know that all the bulk publications are in place in the binding table before any broadcast publications/withdraws can be accepted. Obviously, we could create a backlog queue in the name table, but I hope we can find a simpler and neater solution. Regards ///jon > -----Original Message----- > From: Hoang Le <hoa...@de...> > Sent: 13-Nov-19 02:35 > To: Jon Maloy <jon...@er...>; ma...@do...; tip...@li... > Subject: [net-next] tipc: update a binding service via broadcast > > Currently, updating binding table (add service binding to > name table/withdraw a service binding) is being sent over replicast. > However, if we are scaling up clusters to > 100 nodes/containers this > method is less affection because of looping through nodes in a cluster one > by one. > > It is worth to use broadcast to update a binding service. Then binding > table updates in all nodes for one shot. > > The mechanism is backward compatible because of sending side changing. > > Signed-off-by: Hoang Le <hoa...@de...> > --- > net/tipc/bcast.c | 13 +++++++++++++ > net/tipc/bcast.h | 2 ++ > net/tipc/name_table.c | 4 ++-- > 3 files changed, 17 insertions(+), 2 deletions(-) > > diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c > index f41096a759fa..18431fa897ab 100644 > --- a/net/tipc/bcast.c > +++ b/net/tipc/bcast.c > @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, > __skb_queue_tail(inputq, _skb); > } > } > + > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb) > +{ > + struct sk_buff_head xmitq; > + u16 cong_link_cnt; > + int rc = 0; > + > + __skb_queue_head_init(&xmitq); > + __skb_queue_tail(&xmitq, skb); > + rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt); > + __skb_queue_purge(&xmitq); > + return rc; > +} > diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h > index dadad953e2be..a100da3800fc 100644 > --- a/net/tipc/bcast.h > +++ b/net/tipc/bcast.h > @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net); > u32 tipc_bcast_get_broadcast_mode(struct net *net); > u32 tipc_bcast_get_broadcast_ratio(struct net *net); > > +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb); > + > void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, > struct sk_buff_head *inputq); > > diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c > index 66a65c2cdb23..9e9c61f7c999 100644 > --- a/net/tipc/name_table.c > +++ b/net/tipc/name_table.c > @@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, > spin_unlock_bh(&tn->nametbl_lock); > > if (skb) > - tipc_node_broadcast(net, skb); > + tipc_bcast_named_publish(net, skb); > return p; > } > > @@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, > spin_unlock_bh(&tn->nametbl_lock); > > if (skb) { > - tipc_node_broadcast(net, skb); > + tipc_bcast_named_publish(net, skb); > return 1; > } > return 0; > -- > 2.20.1 |
From: Hoang Le <hoa...@de...> - 2019-11-13 07:35:45
|
Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. Then binding table updates in all nodes for one shot. The mechanism is backward compatible because of sending side changing. Signed-off-by: Hoang Le <hoa...@de...> --- net/tipc/bcast.c | 13 +++++++++++++ net/tipc/bcast.h | 2 ++ net/tipc/name_table.c | 4 ++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index f41096a759fa..18431fa897ab 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -843,3 +843,16 @@ void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, __skb_queue_tail(inputq, _skb); } } + +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb) +{ + struct sk_buff_head xmitq; + u16 cong_link_cnt; + int rc = 0; + + __skb_queue_head_init(&xmitq); + __skb_queue_tail(&xmitq, skb); + rc = tipc_bcast_xmit(net, &xmitq, &cong_link_cnt); + __skb_queue_purge(&xmitq); + return rc; +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index dadad953e2be..a100da3800fc 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -101,6 +101,8 @@ int tipc_bclink_reset_stats(struct net *net); u32 tipc_bcast_get_broadcast_mode(struct net *net); u32 tipc_bcast_get_broadcast_ratio(struct net *net); +int tipc_bcast_named_publish(struct net *net, struct sk_buff *skb); + void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 66a65c2cdb23..9e9c61f7c999 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -633,7 +633,7 @@ struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (skb) - tipc_node_broadcast(net, skb); + tipc_bcast_named_publish(net, skb); return p; } @@ -664,7 +664,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, spin_unlock_bh(&tn->nametbl_lock); if (skb) { - tipc_node_broadcast(net, skb); + tipc_bcast_named_publish(net, skb); return 1; } return 0; -- 2.20.1 |
From: David M. <da...@da...> - 2019-11-13 03:46:11
|
From: Hoang Le <hoa...@de...> Date: Tue, 12 Nov 2019 07:40:04 +0700 > In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address > hash values"), the 32-bit node address only generated after one second > trial period expired. However the self's addr in struct tipc_monitor do > not update according to node address generated. This lead to it is > always zero as initial value. As result, sorting algorithm using this > value does not work as expected, neither neighbor monitoring framework. > > In this commit, we add a fix to update self's addr when 32-bit node > address generated. > > Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") > Acked-by: Jon Maloy <jon...@er...> > Signed-off-by: Hoang Le <hoa...@de...> Applied. |
From: Hoang L. <hoa...@de...> - 2019-11-13 01:50:11
|
Yeah, but I think we will have the same result with both of them. Because link monitoring is still 'idle' during this period time. Regards, Hoang -----Original Message----- From: Jon Maloy <jon...@er...> Sent: Wednesday, November 13, 2019 8:35 AM To: Hoang Huu Le <hoa...@de...>; ma...@do...; tip...@li... Subject: RE: [net-next] tipc: update mon's self addr when node addr generated Thinking about it, wouldn't it be better to add the node to the monitor at the moment it really has an address, and not earlier? To add it to the monitor with address 0 is pretty meaningless. ///jon > -----Original Message----- > From: Jon Maloy > Sent: 11-Nov-19 09:10 > To: Hoang Le <hoa...@de...>; ma...@do...; tip...@li... > Subject: RE: [net-next] tipc: update mon's self addr when node addr generated > > Acked. > > ///jon > > > -----Original Message----- > > From: Hoang Le <hoa...@de...> > > Sent: 11-Nov-19 04:24 > > To: Jon Maloy <jon...@er...>; ma...@do...; tip...@li... > > Subject: [net-next] tipc: update mon's self addr when node addr generated > > > > In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address > > hash values"), the 32-bit node address only generated after one second > > trial period expired. However the self's addr in struct tipc_monitor do > > not update according to node address generated. This lead to it is > > always zero as initial value. As result, sorting algorithm using this > > value does not work as expected, neither neighbor monitoring framework. > > > > In this commit, we add a fix to update self's addr when 32-bit node > > address generated. > > > > Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") > > Signed-off-by: Hoang Le <hoa...@de...> > > --- > > net/tipc/monitor.c | 15 +++++++++++++++ > > net/tipc/monitor.h | 1 + > > net/tipc/net.c | 2 ++ > > 3 files changed, 18 insertions(+) > > > > diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c > > index 6a6eae88442f..58708b4c7719 100644 > > --- a/net/tipc/monitor.c > > +++ b/net/tipc/monitor.c > > @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) > > kfree(mon); > > } > > > > +void tipc_mon_reinit_self(struct net *net) > > +{ > > + struct tipc_monitor *mon; > > + int bearer_id; > > + > > + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { > > + mon = tipc_monitor(net, bearer_id); > > + if (!mon) > > + continue; > > + write_lock_bh(&mon->lock); > > + mon->self->addr = tipc_own_addr(net); > > + write_unlock_bh(&mon->lock); > > + } > > +} > > + > > int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) > > { > > struct tipc_net *tn = tipc_net(net); > > diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h > > index 2a21b93e0d04..ed63d2e650b0 100644 > > --- a/net/tipc/monitor.h > > +++ b/net/tipc/monitor.h > > @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, > > u32 bearer_id); > > int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, > > u32 bearer_id, u32 *prev_node); > > +void tipc_mon_reinit_self(struct net *net); > > > > extern const int tipc_max_domain_size; > > #endif > > diff --git a/net/tipc/net.c b/net/tipc/net.c > > index 85707c185360..2de3cec9929d 100644 > > --- a/net/tipc/net.c > > +++ b/net/tipc/net.c > > @@ -42,6 +42,7 @@ > > #include "node.h" > > #include "bcast.h" > > #include "netlink.h" > > +#include "monitor.h" > > > > /* > > * The TIPC locking policy is designed to ensure a very fine locking > > @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) > > tipc_set_node_addr(net, addr); > > tipc_named_reinit(net); > > tipc_sk_reinit(net); > > + tipc_mon_reinit_self(net); > > tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, > > TIPC_CLUSTER_SCOPE, 0, addr); > > } > > -- > > 2.20.1 |
From: Jon M. <jon...@er...> - 2019-11-13 01:35:01
|
Thinking about it, wouldn't it be better to add the node to the monitor at the moment it really has an address, and not earlier? To add it to the monitor with address 0 is pretty meaningless. ///jon > -----Original Message----- > From: Jon Maloy > Sent: 11-Nov-19 09:10 > To: Hoang Le <hoa...@de...>; ma...@do...; tip...@li... > Subject: RE: [net-next] tipc: update mon's self addr when node addr generated > > Acked. > > ///jon > > > -----Original Message----- > > From: Hoang Le <hoa...@de...> > > Sent: 11-Nov-19 04:24 > > To: Jon Maloy <jon...@er...>; ma...@do...; tip...@li... > > Subject: [net-next] tipc: update mon's self addr when node addr generated > > > > In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address > > hash values"), the 32-bit node address only generated after one second > > trial period expired. However the self's addr in struct tipc_monitor do > > not update according to node address generated. This lead to it is > > always zero as initial value. As result, sorting algorithm using this > > value does not work as expected, neither neighbor monitoring framework. > > > > In this commit, we add a fix to update self's addr when 32-bit node > > address generated. > > > > Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") > > Signed-off-by: Hoang Le <hoa...@de...> > > --- > > net/tipc/monitor.c | 15 +++++++++++++++ > > net/tipc/monitor.h | 1 + > > net/tipc/net.c | 2 ++ > > 3 files changed, 18 insertions(+) > > > > diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c > > index 6a6eae88442f..58708b4c7719 100644 > > --- a/net/tipc/monitor.c > > +++ b/net/tipc/monitor.c > > @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) > > kfree(mon); > > } > > > > +void tipc_mon_reinit_self(struct net *net) > > +{ > > + struct tipc_monitor *mon; > > + int bearer_id; > > + > > + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { > > + mon = tipc_monitor(net, bearer_id); > > + if (!mon) > > + continue; > > + write_lock_bh(&mon->lock); > > + mon->self->addr = tipc_own_addr(net); > > + write_unlock_bh(&mon->lock); > > + } > > +} > > + > > int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) > > { > > struct tipc_net *tn = tipc_net(net); > > diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h > > index 2a21b93e0d04..ed63d2e650b0 100644 > > --- a/net/tipc/monitor.h > > +++ b/net/tipc/monitor.h > > @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, > > u32 bearer_id); > > int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, > > u32 bearer_id, u32 *prev_node); > > +void tipc_mon_reinit_self(struct net *net); > > > > extern const int tipc_max_domain_size; > > #endif > > diff --git a/net/tipc/net.c b/net/tipc/net.c > > index 85707c185360..2de3cec9929d 100644 > > --- a/net/tipc/net.c > > +++ b/net/tipc/net.c > > @@ -42,6 +42,7 @@ > > #include "node.h" > > #include "bcast.h" > > #include "netlink.h" > > +#include "monitor.h" > > > > /* > > * The TIPC locking policy is designed to ensure a very fine locking > > @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) > > tipc_set_node_addr(net, addr); > > tipc_named_reinit(net); > > tipc_sk_reinit(net); > > + tipc_mon_reinit_self(net); > > tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, > > TIPC_CLUSTER_SCOPE, 0, addr); > > } > > -- > > 2.20.1 |
From: Hoang Le <hoa...@de...> - 2019-11-12 00:40:26
|
In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values"), the 32-bit node address only generated after one second trial period expired. However the self's addr in struct tipc_monitor do not update according to node address generated. This lead to it is always zero as initial value. As result, sorting algorithm using this value does not work as expected, neither neighbor monitoring framework. In this commit, we add a fix to update self's addr when 32-bit node address generated. Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Hoang Le <hoa...@de...> --- net/tipc/monitor.c | 15 +++++++++++++++ net/tipc/monitor.h | 1 + net/tipc/net.c | 2 ++ 3 files changed, 18 insertions(+) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 6a6eae88442f..58708b4c7719 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) kfree(mon); } +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(&mon->lock); + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(&mon->lock); + } +} + int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 2a21b93e0d04..ed63d2e650b0 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id); int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 85707c185360..2de3cec9929d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -42,6 +42,7 @@ #include "node.h" #include "bcast.h" #include "netlink.h" +#include "monitor.h" /* * The TIPC locking policy is designed to ensure a very fine locking @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); + tipc_mon_reinit_self(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } -- 2.20.1 |
From: Jon M. <jon...@er...> - 2019-11-11 14:26:29
|
Acked. ///jon > -----Original Message----- > From: Hoang Le <hoa...@de...> > Sent: 11-Nov-19 04:24 > To: Jon Maloy <jon...@er...>; ma...@do...; tip...@li... > Subject: [net-next] tipc: update mon's self addr when node addr generated > > In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address > hash values"), the 32-bit node address only generated after one second > trial period expired. However the self's addr in struct tipc_monitor do > not update according to node address generated. This lead to it is > always zero as initial value. As result, sorting algorithm using this > value does not work as expected, neither neighbor monitoring framework. > > In this commit, we add a fix to update self's addr when 32-bit node > address generated. > > Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") > Signed-off-by: Hoang Le <hoa...@de...> > --- > net/tipc/monitor.c | 15 +++++++++++++++ > net/tipc/monitor.h | 1 + > net/tipc/net.c | 2 ++ > 3 files changed, 18 insertions(+) > > diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c > index 6a6eae88442f..58708b4c7719 100644 > --- a/net/tipc/monitor.c > +++ b/net/tipc/monitor.c > @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) > kfree(mon); > } > > +void tipc_mon_reinit_self(struct net *net) > +{ > + struct tipc_monitor *mon; > + int bearer_id; > + > + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { > + mon = tipc_monitor(net, bearer_id); > + if (!mon) > + continue; > + write_lock_bh(&mon->lock); > + mon->self->addr = tipc_own_addr(net); > + write_unlock_bh(&mon->lock); > + } > +} > + > int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) > { > struct tipc_net *tn = tipc_net(net); > diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h > index 2a21b93e0d04..ed63d2e650b0 100644 > --- a/net/tipc/monitor.h > +++ b/net/tipc/monitor.h > @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, > u32 bearer_id); > int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, > u32 bearer_id, u32 *prev_node); > +void tipc_mon_reinit_self(struct net *net); > > extern const int tipc_max_domain_size; > #endif > diff --git a/net/tipc/net.c b/net/tipc/net.c > index 85707c185360..2de3cec9929d 100644 > --- a/net/tipc/net.c > +++ b/net/tipc/net.c > @@ -42,6 +42,7 @@ > #include "node.h" > #include "bcast.h" > #include "netlink.h" > +#include "monitor.h" > > /* > * The TIPC locking policy is designed to ensure a very fine locking > @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) > tipc_set_node_addr(net, addr); > tipc_named_reinit(net); > tipc_sk_reinit(net); > + tipc_mon_reinit_self(net); > tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, > TIPC_CLUSTER_SCOPE, 0, addr); > } > -- > 2.20.1 |
From: Hoang Le <hoa...@de...> - 2019-11-11 09:24:03
|
In commit 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values"), the 32-bit node address only generated after one second trial period expired. However the self's addr in struct tipc_monitor do not update according to node address generated. This lead to it is always zero as initial value. As result, sorting algorithm using this value does not work as expected, neither neighbor monitoring framework. In this commit, we add a fix to update self's addr when 32-bit node address generated. Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Signed-off-by: Hoang Le <hoa...@de...> --- net/tipc/monitor.c | 15 +++++++++++++++ net/tipc/monitor.h | 1 + net/tipc/net.c | 2 ++ 3 files changed, 18 insertions(+) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 6a6eae88442f..58708b4c7719 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -665,6 +665,21 @@ void tipc_mon_delete(struct net *net, int bearer_id) kfree(mon); } +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(&mon->lock); + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(&mon->lock); + } +} + int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) { struct tipc_net *tn = tipc_net(net); diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 2a21b93e0d04..ed63d2e650b0 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -77,6 +77,7 @@ int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id); int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 85707c185360..2de3cec9929d 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -42,6 +42,7 @@ #include "node.h" #include "bcast.h" #include "netlink.h" +#include "monitor.h" /* * The TIPC locking policy is designed to ensure a very fine locking @@ -136,6 +137,7 @@ static void tipc_net_finalize(struct net *net, u32 addr) tipc_set_node_addr(net, addr); tipc_named_reinit(net); tipc_sk_reinit(net); + tipc_mon_reinit_self(net); tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, TIPC_CLUSTER_SCOPE, 0, addr); } -- 2.20.1 |
From: David M. <da...@da...> - 2019-11-08 22:02:48
|
From: Tuong Lien <tuo...@de...> Date: Fri, 8 Nov 2019 12:05:07 +0700 > This series provides TIPC encryption feature, kernel part. There will be > another one in the 'iproute2/tipc' for user space to set key. > > v2: add select crypto 'aes(gcm)' for TIPC_CRYPTO in Kconfig Series applied, thanks. |
From: Jon M. <jon...@er...> - 2019-11-08 16:26:55
|
> -----Original Message----- > From: Tuong Lien <tuo...@de...> > Sent: 6-Nov-19 05:39 > To: tip...@li...; Jon Maloy <jon...@er...>; ma...@do...; > yin...@wi... > Subject: [PATCH RFC] tipc: fix name table rbtree issues > > The current rbtree for service ranges in the name table is built based > on the 'lower' & 'upper' range values resulting in a flaw in the rbtree > searching. Some issues have been observed in case of range overlapping: > > Case #1: unable to withdraw a name entry: > After some name services are bound, all of them are withdrawn by user > but one remains in the name table forever. This corrupts the table and > that service becomes dummy i.e. no real port. > E.g. > > / > {22, 22} > / > / > ---> {10, 50} > / \ > / \ > {10, 30} {20, 60} > > The node {10, 30} cannot be removed since the rbtree searching stops at > the node's ancestor i.e. {10, 50}, so starting from it will never reach > the finding node. > > Case #2: failed to send data in some cases: > E.g. Two service ranges: {20, 60}, {10, 50} are bound. The rbtree for > this service will be one of the two cases below depending on the order > of the bindings: > > {20, 60} {10, 50} <-- > / \ / \ > / \ / \ > {10, 50} NIL <-- NIL {20, 60} > > (a) (b) > > Now, try to send some data to service {30}, there will be two results: > (a): Failed, no route to host. > (b): Ok. > > The reason is that the rbtree searching will stop at the pointing node > as shown above. > > Case #3: no round-robin in data sending: > Same as case #2b above, the data sending to service {30} will always > arrive in the {10, 50}. I wouldn't mention case #3 at all. If we could make this work (which I doubt, see my previous mail) it would in reality imply new functionality. -The old functionality, round-robin between identical ranges does still work, as far as I can understand. > > Case #4: failed to send data: > Same as case #2b above but if the data sending's scope is local and the > {10, 50} is published by a peer node, then it will result in "no route > to host" even though the other {20, 60} is for example on the local > node which should be able to get the data. > > The issues are actually due to the way we built the rbtree. This commit > fixes it by introducing an additional field to each node, named 'max', > which is the largest 'upper' of that node subtree. The 'max' value for > each subtrees will be propagated correctly whenever a node is inserted/ > removed or the tree is rebalanced by the augmented rbtree callbacks. > > By this way, we can change the rbtree searching appoarch to solve the > issues above. Case #3 is however not covered by this commit, we leave > it as current until one is proven to need a round-robin fashion for it. > > Besides, since now we have the 'max' value, we can even improve the > searching for a next range matching e.g. in case of multicast, so get > rid of the unneeded looping over all the nodes in the tree. > > Signed-off-by: Tuong Lien <tuo...@de...> > --- > net/tipc/name_table.c | 268 +++++++++++++++++++++++++++++++++----------------- > 1 file changed, 179 insertions(+), 89 deletions(-) > > diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c > index 66a65c2cdb23..5cac6c1dfeb0 100644 > --- a/net/tipc/name_table.c > +++ b/net/tipc/name_table.c > @@ -35,6 +35,7 @@ > */ > > #include <net/sock.h> > +#include <linux/rbtree_augmented.h> > #include "core.h" > #include "netlink.h" > #include "name_table.h" > @@ -50,6 +51,7 @@ > * @lower: service range lower bound > * @upper: service range upper bound > * @tree_node: member of service range RB tree > + * @max: largest 'upper' in this node subtree > * @local_publ: list of identical publications made from this node > * Used by closest_first lookup and multicast lookup algorithm > * @all_publ: all publications identical to this one, whatever node and scope > @@ -59,6 +61,7 @@ struct service_range { > u32 lower; > u32 upper; > struct rb_node tree_node; > + u32 max; > struct list_head local_publ; > struct list_head all_publ; > }; > @@ -81,6 +84,130 @@ struct tipc_service { > struct rcu_head rcu; > }; > > +#define service_range_upper(sr) ((sr)->upper) > +RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks, > + struct service_range, tree_node, u32, max, > + service_range_upper) > + > +#define service_range_entry(rbtree_node) \ > + (container_of(rbtree_node, struct service_range, tree_node)) > + > +#define service_range_overlap(sr, start, end) \ > + ((sr)->lower <= (end) && (sr)->upper >= (start)) > + > +/** > + * service_range_foreach_match - iterate over tipc service rbtree for each > + * range match > + * @sr: the service range pointer as a loop cursor > + * @sc: the pointer to tipc service which holds the service range rbtree > + * @start, end: the range (end >= start) for matching > + */ > +#define service_range_foreach_match(sr, sc, start, end) \ > + for (sr = service_range_match_first((sc)->ranges.rb_node, \ > + (start), \ > + (end)); \ > + sr; \ > + sr = service_range_match_next(&(sr)->tree_node, \ > + (start), \ > + (end))) > + > +/** > + * service_range_match_first - find first service range matching a range > + * @n: the root node of service range rbtree for searching > + * @start, end: the range (end >= start) for matching > + * > + * Return: the leftmost service range node in the rbtree that overlaps the > + * specific range if any. Otherwise, returns NULL. > + */ > +static struct service_range *service_range_match_first(struct rb_node *n, > + u32 start, u32 end) > +{ > + struct service_range *sr; > + struct rb_node *l, *r; > + > + /* Non overlaps in tree at all? */ > + if (!n || service_range_entry(n)->max < start) > + return NULL; > + > + while (n) { > + l = n->rb_left; > + if (l && service_range_entry(l)->max >= start) { > + /* A leftmost overlap range node must be one in the left > + * subtree. If not, it has lower > end, then nodes on > + * the right side cannot satisfy the condition either. > + */ > + n = l; > + continue; > + } > + > + /* No one in the left subtree can match, return if this node is > + * an overlap i.e. leftmost. > + */ > + sr = service_range_entry(n); > + if (service_range_overlap(sr, start, end)) > + return sr; > + > + /* Ok, try to lookup on the right side */ > + r = n->rb_right; > + if (sr->lower <= end && > + r && service_range_entry(r)->max >= start) { > + n = r; > + continue; > + } > + break; > + } > + > + return NULL; > +} > + > +/** > + * service_range_match_next - find next service range matching a range > + * @n: a node in service range rbtree from which the searching starts > + * @start, end: the range (end >= start) for matching > + * > + * Return: the next service range node to the given node in the rbtree that > + * overlaps the specific range if any. Otherwise, returns NULL. > + */ > +static struct service_range *service_range_match_next(struct rb_node *n, > + u32 start, u32 end) > +{ > + struct service_range *sr; > + struct rb_node *p, *r; > + > + while (n) { > + r = n->rb_right; > + if (r && service_range_entry(r)->max >= start) > + /* A next overlap range node must be one in the right > + * subtree. If not, it has lower > end, then any next > + * successor (- an ancestor) of this node cannot > + * satisfy the condition either. > + */ > + return service_range_match_first(r, start, end); > + > + /* No one in the right subtree can match, go up to find an > + * ancestor of this node which is parent of a left-hand child. > + */ > + while ((p = rb_parent(n)) && n == p->rb_right) > + n = p; > + if (!p) > + break; > + > + /* Return if this ancestor is an overlap */ > + sr = service_range_entry(p); > + if (service_range_overlap(sr, start, end)) > + return sr; > + > + /* Ok, try to lookup more from this ancestor */ > + if (sr->lower <= end) { > + n = p; > + continue; > + } > + break; > + } > + > + return NULL; > +} > + > static int hash(int x) > { > return x & (TIPC_NAMETBL_SIZE - 1); > @@ -143,19 +270,8 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd) > static struct service_range *tipc_service_first_range(struct tipc_service *sc, > u32 instance) > { > - struct rb_node *n = sc->ranges.rb_node; > - struct service_range *sr; > - > - while (n) { > - sr = container_of(n, struct service_range, tree_node); > - if (sr->lower > instance) > - n = n->rb_left; > - else if (sr->upper < instance) > - n = n->rb_right; > - else > - return sr; > - } > - return NULL; > + return service_range_match_first(sc->ranges.rb_node, instance, > + instance); > } This function looks redundant now. It is called from only one location, and could just as well be replaced with a direct call to service_range_match_first(). Otherwise this looks good. Acked-by: jon > > /* tipc_service_find_range - find service range matching publication parameters > @@ -163,56 +279,46 @@ static struct service_range *tipc_service_first_range(struct tipc_service *sc, > static struct service_range *tipc_service_find_range(struct tipc_service *sc, > u32 lower, u32 upper) > { > - struct rb_node *n = sc->ranges.rb_node; > struct service_range *sr; > > - sr = tipc_service_first_range(sc, lower); > - if (!sr) > - return NULL; > - > - /* Look for exact match */ > - for (n = &sr->tree_node; n; n = rb_next(n)) { > - sr = container_of(n, struct service_range, tree_node); > - if (sr->upper == upper) > - break; > + service_range_foreach_match(sr, sc, lower, upper) { > + /* Look for exact match */ > + if (sr->lower == lower && sr->upper == upper) > + return sr; > } > - if (!n || sr->lower != lower || sr->upper != upper) > - return NULL; > > - return sr; > + return NULL; > } > > static struct service_range *tipc_service_create_range(struct tipc_service *sc, > u32 lower, u32 upper) > { > struct rb_node **n, *parent = NULL; > - struct service_range *sr, *tmp; > + struct service_range *sr; > > n = &sc->ranges.rb_node; > while (*n) { > - tmp = container_of(*n, struct service_range, tree_node); > parent = *n; > - tmp = container_of(parent, struct service_range, tree_node); > - if (lower < tmp->lower) > - n = &(*n)->rb_left; > - else if (lower > tmp->lower) > - n = &(*n)->rb_right; > - else if (upper < tmp->upper) > - n = &(*n)->rb_left; > - else if (upper > tmp->upper) > - n = &(*n)->rb_right; > + sr = service_range_entry(parent); > + if (lower == sr->lower && upper == sr->upper) > + return sr; > + if (sr->max < upper) > + sr->max = upper; > + if (lower <= sr->lower) > + n = &parent->rb_left; > else > - return tmp; > + n = &parent->rb_right; > } > sr = kzalloc(sizeof(*sr), GFP_ATOMIC); > if (!sr) > return NULL; > sr->lower = lower; > sr->upper = upper; > + sr->max = upper; > INIT_LIST_HEAD(&sr->local_publ); > INIT_LIST_HEAD(&sr->all_publ); > rb_link_node(&sr->tree_node, parent, n); > - rb_insert_color(&sr->tree_node, &sc->ranges); > + rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); > return sr; > } > > @@ -289,7 +395,6 @@ static void tipc_service_subscribe(struct tipc_service *service, > struct service_range *sr; > struct tipc_name_seq ns; > struct publication *p; > - struct rb_node *n; > bool first; > > ns.type = tipc_sub_read(sb, seq.type); > @@ -302,14 +407,8 @@ static void tipc_service_subscribe(struct tipc_service *service, > if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS) > return; > > - for (n = rb_first(&service->ranges); n; n = rb_next(n)) { > - sr = container_of(n, struct service_range, tree_node); > - if (sr->lower > ns.upper) > - break; > - if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper)) > - continue; > + service_range_foreach_match(sr, service, ns.lower, ns.upper) { > first = true; > - > list_for_each_entry(p, &sr->all_publ, all_publ) { > tipc_sub_report_overlap(sub, sr->lower, sr->upper, > TIPC_PUBLISHED, p->port, > @@ -390,7 +489,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, > > /* Remove service range item if this was its last publication */ > if (list_empty(&sr->all_publ)) { > - rb_erase(&sr->tree_node, &sc->ranges); > + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); > kfree(sr); > } > > @@ -438,34 +537,39 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 > *dnode) > rcu_read_lock(); > sc = tipc_service_find(net, type); > if (unlikely(!sc)) > - goto not_found; > + goto exit; > > spin_lock_bh(&sc->lock); > - sr = tipc_service_first_range(sc, instance); > - if (unlikely(!sr)) > - goto no_match; > - > - /* Select lookup algorithm: local, closest-first or round-robin */ > - if (*dnode == self) { > - list = &sr->local_publ; > - if (list_empty(list)) > - goto no_match; > - p = list_first_entry(list, struct publication, local_publ); > - list_move_tail(&p->local_publ, &sr->local_publ); > - } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { > - list = &sr->local_publ; > - p = list_first_entry(list, struct publication, local_publ); > - list_move_tail(&p->local_publ, &sr->local_publ); > - } else { > - list = &sr->all_publ; > - p = list_first_entry(list, struct publication, all_publ); > - list_move_tail(&p->all_publ, &sr->all_publ); > + service_range_foreach_match(sr, sc, instance, instance) { > + /* Select lookup algo: local, closest-first or round-robin */ > + if (*dnode == self) { > + list = &sr->local_publ; > + if (list_empty(list)) > + continue; > + p = list_first_entry(list, struct publication, > + local_publ); > + list_move_tail(&p->local_publ, &sr->local_publ); > + } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { > + list = &sr->local_publ; > + p = list_first_entry(list, struct publication, > + local_publ); > + list_move_tail(&p->local_publ, &sr->local_publ); > + } else { > + list = &sr->all_publ; > + p = list_first_entry(list, struct publication, > + all_publ); > + list_move_tail(&p->all_publ, &sr->all_publ); > + } > + port = p->port; > + node = p->node; > + /* As for legacy, pick the first matching range only, a "true" > + * round-robin will be performed as needed. > + */ > + break; > } > - port = p->port; > - node = p->node; > -no_match: > spin_unlock_bh(&sc->lock); > -not_found: > + > +exit: > rcu_read_unlock(); > *dnode = node; > return port; > @@ -517,7 +621,6 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, > struct service_range *sr; > struct tipc_service *sc; > struct publication *p; > - struct rb_node *n; > > rcu_read_lock(); > sc = tipc_service_find(net, type); > @@ -525,13 +628,7 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, > goto exit; > > spin_lock_bh(&sc->lock); > - > - for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { > - sr = container_of(n, struct service_range, tree_node); > - if (sr->upper < lower) > - continue; > - if (sr->lower > upper) > - break; > + service_range_foreach_match(sr, sc, lower, upper) { > list_for_each_entry(p, &sr->local_publ, local_publ) { > if (p->scope == scope || (!exact && p->scope < scope)) > tipc_dest_push(dports, 0, p->port); > @@ -552,7 +649,6 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, > struct service_range *sr; > struct tipc_service *sc; > struct publication *p; > - struct rb_node *n; > > rcu_read_lock(); > sc = tipc_service_find(net, type); > @@ -560,13 +656,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, > goto exit; > > spin_lock_bh(&sc->lock); > - > - for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { > - sr = container_of(n, struct service_range, tree_node); > - if (sr->upper < lower) > - continue; > - if (sr->lower > upper) > - break; > + service_range_foreach_match(sr, sc, lower, upper) { > list_for_each_entry(p, &sr->all_publ, all_publ) { > tipc_nlist_add(nodes, p->node); > } > @@ -764,7 +854,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc) > tipc_service_remove_publ(sr, p->node, p->key); > kfree_rcu(p, rcu); > } > - rb_erase(&sr->tree_node, &sc->ranges); > + rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks); > kfree(sr); > } > hlist_del_init_rcu(&sc->service_list); > -- > 2.13.7 |
From: Tuong L. T. <tuo...@de...> - 2019-11-08 10:12:27
|
Hi Jon, I just had a look into the patch, a few concerns: 1) Do we apply the algorithm to the broadcast sender link as well? If so, I guess we will have to decrease the link window at the bc_retrans()...? 2) Why don't we try to increase the link window just prior to the backlog advancing (its criteria might need to change a bit...)? Then, we can just go ahead at the link_xmit() (i.e. no worry about a gap between the two queues...), so even xmit more packets somehow (i.e. no need to put into the backlogq and wait for next chances...)? BR/Tuong -----Original Message----- From: Jon Maloy <jon...@er...> Sent: Tuesday, November 5, 2019 1:39 AM To: Jon Maloy <jon...@er...>; Jon Maloy <ma...@do...> Cc: moh...@er...; par...@gm...; tun...@de...; hoa...@de...; tuo...@de...; gor...@de...; yin...@wi...; tip...@li... Subject: [net-next 1/3] tipc: introduce variable window congestion control We introduce a simple variable window congestion control for links. The algorithm is inspired by the Reno algorithm, and can best be descibed as working in permanent "congestion avoidance" mode, within strict limits. - We introduce hard lower and upper window limits per link, still different and configurable per bearer type. - Next, we let a link start at the minimum window, and then slowly increment it for each 32 received non-duplicate ACK. This goes on until it either reaches the upper limit, or until it receives a NACK message. - For each non-duplicate NACK received, we let the window decrease by intervals of 1/2 of the current window, but not below the minimum window. The change does in reality have effect only on unicast ethernet transport, as we have seen that there is no room whatsoever for increasing the window size for the UDP bearer. This algorithm seems to give a ~25% throughput improvement for large messages, while it has no effect on small message throughput. Suggested-by: Xin Long <luc...@gm...> Signed-off-by: Jon Maloy <jon...@er...> --- net/tipc/bcast.c | 11 +++++---- net/tipc/bearer.c | 11 +++++---- net/tipc/bearer.h | 6 +++-- net/tipc/eth_media.c | 6 ++++- net/tipc/ib_media.c | 5 +++- net/tipc/link.c | 70 ++++++++++++++++++++++++++++++++++------------------ net/tipc/link.h | 9 ++++--- net/tipc/node.c | 13 ++++++---- net/tipc/udp_media.c | 3 ++- 9 files changed, 86 insertions(+), 48 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 6ef1abd..12fde9a 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -562,18 +562,18 @@ int tipc_bclink_reset_stats(struct net *net) return 0; } -static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) +static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) { struct tipc_link *l = tipc_bc_sndlink(net); if (!l) return -ENOPROTOOPT; - if (limit < BCLINK_WIN_MIN) - limit = BCLINK_WIN_MIN; - if (limit > TIPC_MAX_LINK_WIN) + if (max_win < BCLINK_WIN_MIN) + max_win = BCLINK_WIN_MIN; + if (max_win > TIPC_MAX_LINK_WIN) return -EINVAL; tipc_bcast_lock(net); - tipc_link_set_queue_limits(l, limit); + tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win); tipc_bcast_unlock(net); return 0; } @@ -683,6 +683,7 @@ int tipc_bcast_init(struct net *net) if (!tipc_link_bc_create(net, 0, 0, FB_MTU, BCLINK_WIN_DEFAULT, + BCLINK_WIN_DEFAULT, 0, &bb->inputq, NULL, diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 0214aa1..f994961 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -310,7 +310,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, b->identity = bearer_id; b->tolerance = m->tolerance; - b->window = m->window; + b->min_win = m->min_win; + b->max_win = m->max_win; b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = prio; @@ -765,7 +766,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) goto prop_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win)) goto prop_msg_full; if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) @@ -1057,7 +1058,7 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_PRIO]) b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) - b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) return -EINVAL; @@ -1111,7 +1112,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg, goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) goto prop_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win)) goto prop_msg_full; if (media->type_id == TIPC_MEDIA_TYPE_UDP) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) @@ -1244,7 +1245,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_PRIO]) m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (props[TIPC_NLA_PROP_WIN]) - m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (props[TIPC_NLA_PROP_MTU]) { if (m->type_id != TIPC_MEDIA_TYPE_UDP) return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ea0f3c4..58a23b9 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -119,7 +119,8 @@ struct tipc_media { char *raw); u32 priority; u32 tolerance; - u32 window; + u32 min_win; + u32 max_win; u32 mtu; u32 type_id; u32 hwaddr_len; @@ -158,7 +159,8 @@ struct tipc_bearer { struct packet_type pt; struct rcu_head rcu; u32 priority; - u32 window; + u32 min_win; + u32 max_win; u32 tolerance; u32 domain; u32 identity; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index f69a2fd..38cdcab 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -37,6 +37,9 @@ #include "core.h" #include "bearer.h" +#define TIPC_MIN_ETH_LINK_WIN 50 +#define TIPC_MAX_ETH_LINK_WIN 500 + /* Convert Ethernet address (media address format) to string */ static int tipc_eth_addr2str(struct tipc_media_addr *addr, char *strbuf, int bufsz) @@ -92,7 +95,8 @@ struct tipc_media eth_media_info = { .raw2addr = tipc_eth_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_MIN_ETH_LINK_WIN, + .max_win = TIPC_MAX_ETH_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_ETH, .hwaddr_len = ETH_ALEN, .name = "eth" diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index e8c1671..7aa9ff8 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -42,6 +42,8 @@ #include "core.h" #include "bearer.h" +#define TIPC_MAX_IB_LINK_WIN 500 + /* convert InfiniBand address (media address format) media address to string */ static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) @@ -94,7 +96,8 @@ struct tipc_media ib_media_info = { .raw2addr = tipc_ib_raw2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_MAX_IB_LINK_WIN, .type_id = TIPC_MEDIA_TYPE_IB, .hwaddr_len = INFINIBAND_ALEN, .name = "ib" diff --git a/net/tipc/link.c b/net/tipc/link.c index 038861ba..24ea942 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -163,7 +163,6 @@ struct tipc_link { struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; - u16 window; /* Reception */ u16 rcv_nxt; @@ -174,6 +173,10 @@ struct tipc_link { /* Congestion handling */ struct sk_buff_head wakeupq; + u16 window; + u16 min_win; + u16 max_win; + u16 cong_acks; /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; @@ -307,9 +310,14 @@ u32 tipc_link_id(struct tipc_link *l) return l->peer_bearer_id << 16 | l->bearer_id; } -int tipc_link_window(struct tipc_link *l) +int tipc_link_min_win(struct tipc_link *l) +{ + return l->min_win; +} + +int tipc_link_max_win(struct tipc_link *l) { - return l->window; + return l->max_win; } int tipc_link_prio(struct tipc_link *l) @@ -426,7 +434,8 @@ u32 tipc_link_state(struct tipc_link *l) * @net_plane: network plane (A,B,c..) this link belongs to * @mtu: mtu to be advertised by link * @priority: priority to be used by link - * @window: send window to be used by link + * @min_win: minimal send window to be used by link + * @max_win: maximal send window to be used by link * @session: session to be used by link * @ownnode: identity of own node * @peer: node id of peer node @@ -441,7 +450,7 @@ u32 tipc_link_state(struct tipc_link *l) */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 self, + u32 min_win, u32 max_win, u32 session, u32 self, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, @@ -485,7 +494,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, l->advertised_mtu = mtu; l->mtu = mtu; l->priority = priority; - tipc_link_set_queue_limits(l, window); + tipc_link_set_queue_limits(l, min_win, max_win); l->ackers = 1; l->bc_sndlink = bc_sndlink; l->bc_rcvlink = bc_rcvlink; @@ -513,7 +522,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, * Returns true if link was created, otherwise false */ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, - int mtu, int window, u16 peer_caps, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, @@ -521,9 +530,9 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, { struct tipc_link *l; - if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, - 0, ownnode, peer, NULL, peer_caps, bc_sndlink, - NULL, inputq, namedq, link)) + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win, + max_win, 0, ownnode, peer, NULL, peer_caps, + bc_sndlink, NULL, inputq, namedq, link)) return false; l = *link; @@ -977,7 +986,8 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, /* Prepare each packet for sending, and add to relevant queue: */ while ((skb = __skb_dequeue(list))) { - if (likely(skb_queue_len(transmq) < maxwin)) { + if (likely(skb_queue_empty(backlogq) && + skb_queue_len(transmq) < maxwin)) { hdr = buf_msg(skb); msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); @@ -1028,6 +1038,8 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, static void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { + struct sk_buff_head *txq = &l->transmq; + u16 qlen, add, cwin = l->window; struct sk_buff *skb, *_skb; struct tipc_msg *hdr; u16 seqno = l->snd_nxt; @@ -1035,7 +1047,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l, u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u32 imp; - while (skb_queue_len(&l->transmq) < l->window) { + while (skb_queue_len(txq) < cwin) { skb = skb_peek(&l->backlogq); if (!skb) break; @@ -1063,6 +1075,11 @@ static void tipc_link_advance_backlog(struct tipc_link *l, seqno++; } l->snd_nxt = seqno; + qlen = skb_queue_len(txq); + if (qlen >= cwin && (l->snd_nxt - buf_seqno(skb_peek(txq)) == qlen)) { + add = l->cong_acks++ % 32 ? 0 : 1; + l->window = min_t(u16, cwin + add, l->max_win); + } } /** @@ -1407,7 +1424,9 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + bool retransmitted = false; u16 ack = l->rcv_nxt - 1; + u16 cwin = l->window; bool passed = false; u16 seqno, n = 0; int rc = 0; @@ -1440,7 +1459,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; - + retransmitted = true; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) TIPC_SKB_CB(skb)->retr_stamp = jiffies; @@ -1454,7 +1473,8 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap, goto next_gap_ack; } } - + if (retransmitted) + l->window = TIPC_DEF_LINK_WIN + (cwin - l->min_win) / 2; return 0; } @@ -2297,15 +2317,17 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, return 0; } -void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) { int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); - l->window = win; - l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win); - l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2); - l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3); - l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4); + l->window = min_win; + l->min_win = min_win; + l->max_win = max_win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2; + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4; + l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6; + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8; l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } @@ -2358,10 +2380,10 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) } if (props[TIPC_NLA_PROP_WIN]) { - u32 win; + u32 max_win; - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN)) + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (max_win < TIPC_MIN_LINK_WIN || max_win > TIPC_MAX_LINK_WIN) return -EINVAL; } @@ -2597,7 +2619,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; - if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) goto prop_msg_full; diff --git a/net/tipc/link.h b/net/tipc/link.h index adcad65..caed071 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -73,7 +73,7 @@ enum { bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, - int window, u32 session, u32 ownnode, + u32 min_win, u32 max_win, u32 session, u32 ownnode, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, @@ -81,7 +81,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, struct sk_buff_head *namedq, struct tipc_link **link); bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, - int mtu, int window, u16 peer_caps, + int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, @@ -115,7 +115,8 @@ char *tipc_link_name_ext(struct tipc_link *l, char *buf); u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); -int tipc_link_window(struct tipc_link *l); +int tipc_link_min_win(struct tipc_link *l); +int tipc_link_max_win(struct tipc_link *l); void tipc_link_update_caps(struct tipc_link *l, u16 capabilities); bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr); unsigned long tipc_link_tolerance(struct tipc_link *l); @@ -124,7 +125,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq); void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); -void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win); int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags); int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); diff --git a/net/tipc/node.c b/net/tipc/node.c index 4b60928..6e361a7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -465,7 +465,8 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, n->active_links[1] = INVALID_BEARER_ID; if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, U16_MAX, - tipc_link_window(tipc_bc_sndlink(net)), + tipc_link_min_win(tipc_bc_sndlink(net)), + tipc_link_max_win(tipc_bc_sndlink(net)), n->capabilities, &n->bc_entry.inputq1, &n->bc_entry.namedq, @@ -1134,7 +1135,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, - b->window, session, + b->min_win, b->max_win, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, @@ -2258,10 +2259,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) tipc_link_set_prio(link, prio, &xmitq); } if (props[TIPC_NLA_PROP_WIN]) { - u32 win; + u32 max_win; - win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); - tipc_link_set_queue_limits(link, win); + max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, + tipc_link_min_win(link), + max_win); } } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 43ca5fd..7bcc79a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -824,7 +824,8 @@ struct tipc_media udp_media_info = { .msg2addr = tipc_udp_msg2addr, .priority = TIPC_DEF_LINK_PRI, .tolerance = TIPC_DEF_LINK_TOL, - .window = TIPC_DEF_LINK_WIN, + .min_win = TIPC_DEF_LINK_WIN, + .max_win = TIPC_DEF_LINK_WIN, .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, -- 2.1.4 |
From: Tuong L. <tuo...@de...> - 2019-11-08 05:05:42
|
This commit adds two netlink commands to TIPC in order for user to be able to set or remove AEAD keys: - TIPC_NL_KEY_SET - TIPC_NL_KEY_FLUSH When the 'KEY_SET' is given along with the key data, the key will be initiated and attached to TIPC crypto. On the other hand, the 'KEY_FLUSH' command will remove all existing keys if any. Acked-by: Ying Xue <yin...@wi...> Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Tuong Lien <tuo...@de...> --- include/uapi/linux/tipc_netlink.h | 4 ++ net/tipc/netlink.c | 18 ++++- net/tipc/node.c | 135 ++++++++++++++++++++++++++++++++++++++ net/tipc/node.h | 4 ++ 4 files changed, 160 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index efb958fd167d..6c2194ab745b 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -63,6 +63,8 @@ enum { TIPC_NL_PEER_REMOVE, TIPC_NL_BEARER_ADD, TIPC_NL_UDP_GET_REMOTEIP, + TIPC_NL_KEY_SET, + TIPC_NL_KEY_FLUSH, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -160,6 +162,8 @@ enum { TIPC_NLA_NODE_UNSPEC, TIPC_NLA_NODE_ADDR, /* u32 */ TIPC_NLA_NODE_UP, /* flag */ + TIPC_NLA_NODE_ID, /* data */ + TIPC_NLA_NODE_KEY, /* data */ __TIPC_NLA_NODE_MAX, TIPC_NLA_NODE_MAX = __TIPC_NLA_NODE_MAX - 1 diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index d32bbd0f5e46..e53231bd23b4 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -102,7 +102,11 @@ const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, - [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } + [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_NODE_ID] = { .type = NLA_BINARY, + .len = TIPC_NODEID_LEN}, + [TIPC_NLA_NODE_KEY] = { .type = NLA_BINARY, + .len = TIPC_AEAD_KEY_SIZE_MAX}, }; /* Properties valid for media, bearer and link */ @@ -257,6 +261,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .dumpit = tipc_udp_nl_dump_remoteip, }, #endif +#ifdef CONFIG_TIPC_CRYPTO + { + .cmd = TIPC_NL_KEY_SET, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_set_key, + }, + { + .cmd = TIPC_NL_KEY_FLUSH, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = tipc_nl_node_flush_key, + }, +#endif }; struct genl_family tipc_genl_family __ro_after_init = { diff --git a/net/tipc/node.c b/net/tipc/node.c index d8bf2c179562..aaf595613e6e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2760,6 +2760,141 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, return skb->len; } +#ifdef CONFIG_TIPC_CRYPTO +static int tipc_nl_retrieve_key(struct nlattr **attrs, + struct tipc_aead_key **key) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; + + if (!attr) + return -ENODATA; + + *key = (struct tipc_aead_key *)nla_data(attr); + if (nla_len(attr) < tipc_aead_key_size(*key)) + return -EINVAL; + + return 0; +} + +static int tipc_nl_retrieve_nodeid(struct nlattr **attrs, u8 **node_id) +{ + struct nlattr *attr = attrs[TIPC_NLA_NODE_ID]; + + if (!attr) + return -ENODATA; + + if (nla_len(attr) < TIPC_NODEID_LEN) + return -EINVAL; + + *node_id = (u8 *)nla_data(attr); + return 0; +} + +int __tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_NODE_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n = NULL; + struct tipc_aead_key *ukey; + struct tipc_crypto *c; + u8 *id, *own_id; + int rc = 0; + + if (!info->attrs[TIPC_NLA_NODE]) + return -EINVAL; + + rc = nla_parse_nested(attrs, TIPC_NLA_NODE_MAX, + info->attrs[TIPC_NLA_NODE], + tipc_nl_node_policy, info->extack); + if (rc) + goto exit; + + own_id = tipc_own_id(net); + if (!own_id) { + rc = -EPERM; + goto exit; + } + + rc = tipc_nl_retrieve_key(attrs, &ukey); + if (rc) + goto exit; + + rc = tipc_aead_key_validate(ukey); + if (rc) + goto exit; + + rc = tipc_nl_retrieve_nodeid(attrs, &id); + switch (rc) { + case -ENODATA: + /* Cluster key mode */ + rc = tipc_crypto_key_init(tn->crypto_tx, ukey, CLUSTER_KEY); + break; + case 0: + /* Per-node key mode */ + if (!memcmp(id, own_id, NODE_ID_LEN)) { + c = tn->crypto_tx; + } else { + n = tipc_node_find_by_id(net, id) ?: + tipc_node_create(net, 0, id, 0xffffu, 0, true); + if (unlikely(!n)) { + rc = -ENOMEM; + break; + } + c = n->crypto_rx; + } + + rc = tipc_crypto_key_init(c, ukey, PER_NODE_KEY); + if (n) + tipc_node_put(n); + break; + default: + break; + } + +exit: + return (rc < 0) ? rc : 0; +} + +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_set_key(skb, info); + rtnl_unlock(); + + return err; +} + +int __tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + + tipc_crypto_key_flush(tn->crypto_tx); + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) + tipc_crypto_key_flush(n->crypto_rx); + rcu_read_unlock(); + + pr_info("All keys are flushed!\n"); + return 0; +} + +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_node_flush_key(skb, info); + rtnl_unlock(); + + return err; +} +#endif + /** * tipc_node_dump - dump TIPC node data * @n: tipc node to be dumped diff --git a/net/tipc/node.h b/net/tipc/node.h index 1a15cf82cb11..a6803b449a2c 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -119,5 +119,9 @@ int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, struct netlink_callback *cb); +#ifdef CONFIG_TIPC_CRYPTO +int tipc_nl_node_set_key(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_flush_key(struct sk_buff *skb, struct genl_info *info); +#endif void tipc_node_pre_cleanup_net(struct net *exit_net); #endif -- 2.13.7 |
From: Tuong L. <tuo...@de...> - 2019-11-08 05:05:39
|
This commit offers an option to encrypt and authenticate all messaging, including the neighbor discovery messages. The currently most advanced algorithm supported is the AEAD AES-GCM (like IPSec or TLS). All encryption/decryption is done at the bearer layer, just before leaving or after entering TIPC. Supported features: - Encryption & authentication of all TIPC messages (header + data); - Two symmetric-key modes: Cluster and Per-node; - Automatic key switching; - Key-expired revoking (sequence number wrapped); - Lock-free encryption/decryption (RCU); - Asynchronous crypto, Intel AES-NI supported; - Multiple cipher transforms; - Logs & statistics; Two key modes: - Cluster key mode: One single key is used for both TX & RX in all nodes in the cluster. - Per-node key mode: Each nodes in the cluster has one specific TX key. For RX, a node requires its peers' TX key to be able to decrypt the messages from those peers. Key setting from user-space is performed via netlink by a user program (e.g. the iproute2 'tipc' tool). Internal key state machine: Attach Align(RX) +-+ +-+ | V | V +---------+ Attach +---------+ | IDLE |---------------->| PENDING |(user = 0) +---------+ +---------+ A A Switch| A | | | | | | Free(switch/revoked) | | (Free)| +----------------------+ | |Timeout | (TX) | | |(RX) | | | | | | v | +---------+ Switch +---------+ | PASSIVE |<----------------| ACTIVE | +---------+ (RX) +---------+ (user = 1) (user >= 1) The number of TFMs is 10 by default and can be changed via the procfs 'net/tipc/max_tfms'. At this moment, as for simplicity, this file is also used to print the crypto statistics at runtime: echo 0xfff1 > /proc/sys/net/tipc/max_tfms The patch defines a new TIPC version (v7) for the encryption message (- backward compatibility as well). The message is basically encapsulated as follows: +----------------------------------------------------------+ | TIPCv7 encryption | Original TIPCv2 | Authentication | | header | packet (encrypted) | Tag | +----------------------------------------------------------+ The throughput is about ~40% for small messages (compared with non- encryption) and ~9% for large messages. With the support from hardware crypto i.e. the Intel AES-NI CPU instructions, the throughput increases upto ~85% for small messages and ~55% for large messages. By default, the new feature is inactive (i.e. no encryption) until user sets a key for TIPC. There is however also a new option - "TIPC_CRYPTO" in the kernel configuration to enable/disable the new code when needed. MAINTAINERS | add two new files 'crypto.h' & 'crypto.c' in tipc Acked-by: Ying Xue <yin...@wi...> Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Tuong Lien <tuo...@de...> --- net/tipc/Kconfig | 15 + net/tipc/Makefile | 1 + net/tipc/bcast.c | 2 +- net/tipc/bearer.c | 35 +- net/tipc/bearer.h | 3 +- net/tipc/core.c | 14 + net/tipc/core.h | 8 + net/tipc/crypto.c | 1986 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/tipc/crypto.h | 167 +++++ net/tipc/link.c | 19 +- net/tipc/link.h | 1 + net/tipc/msg.c | 15 +- net/tipc/msg.h | 46 +- net/tipc/node.c | 99 ++- net/tipc/node.h | 8 + net/tipc/sysctl.c | 11 + net/tipc/udp_media.c | 1 + 17 files changed, 2385 insertions(+), 46 deletions(-) create mode 100644 net/tipc/crypto.c create mode 100644 net/tipc/crypto.h diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index b83e16ade4d2..716b61a701a8 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -35,6 +35,21 @@ config TIPC_MEDIA_UDP Saying Y here will enable support for running TIPC over IP/UDP bool default y +config TIPC_CRYPTO + bool "TIPC encryption support" + depends on TIPC + select CRYPTO + select CRYPTO_AES + select CRYPTO_GCM + help + Saying Y here will enable support for TIPC encryption. + All TIPC messages will be encrypted/decrypted by using the currently most + advanced algorithm: AEAD AES-GCM (like IPSec or TLS) before leaving/ + entering the TIPC stack. + Key setting from user-space is performed via netlink by a user program + (e.g. the iproute2 'tipc' tool). + bool + default y config TIPC_DIAG tristate "TIPC: socket monitoring interface" diff --git a/net/tipc/Makefile b/net/tipc/Makefile index c86aba0282af..11255e970dd4 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -16,6 +16,7 @@ CFLAGS_trace.o += -I$(src) tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o tipc-$(CONFIG_SYSCTL) += sysctl.o +tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o obj-$(CONFIG_TIPC_DIAG) += diag.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 6ef1abdd525f..f41096a759fa 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -84,7 +84,7 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) */ int tipc_bcast_get_mtu(struct net *net) { - return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; + return tipc_link_mss(tipc_bc_sndlink(net)); } void tipc_bcast_disable_rcast(struct net *net) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6e15b9b1f1ef..d7ec26bd739d 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -44,6 +44,7 @@ #include "netlink.h" #include "udp_media.h" #include "trace.h" +#include "crypto.h" #define MAX_ADDR_STR 60 @@ -516,10 +517,15 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, rcu_read_lock(); b = bearer_get(net, bearer_id); - if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) - b->media->send_msg(net, skb, b, dest); - else + if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dest, NULL); + if (skb) +#endif + b->media->send_msg(net, skb, b, dest); + } else { kfree_skb(skb); + } rcu_read_unlock(); } @@ -527,7 +533,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, */ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, - struct tipc_media_addr *dst) + struct tipc_media_addr *dst, + struct tipc_node *__dnode) { struct tipc_bearer *b; struct sk_buff *skb, *tmp; @@ -541,10 +548,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); - if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) - b->media->send_msg(net, skb, b, dst); - else + if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dst, __dnode); + if (skb) +#endif + b->media->send_msg(net, skb, b, dst); + } else { kfree_skb(skb); + } } rcu_read_unlock(); } @@ -555,6 +567,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq) { struct tipc_net *tn = tipc_net(net); + struct tipc_media_addr *dst; int net_id = tn->net_id; struct tipc_bearer *b; struct sk_buff *skb, *tmp; @@ -569,7 +582,12 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, msg_set_non_seq(hdr, 1); msg_set_mc_netid(hdr, net_id); __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, &b->bcast_addr); + dst = &b->bcast_addr; +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_xmit(net, &skb, b, dst, NULL); + if (skb) +#endif + b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } @@ -596,6 +614,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb_mark_not_on_list(skb); + TIPC_SKB_CB(skb)->flags = 0; tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index faca696d422f..d0c79cc6c0c2 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -232,7 +232,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct tipc_media_addr *dest); void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, - struct tipc_media_addr *dst); + struct tipc_media_addr *dst, + struct tipc_node *__dnode); void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts); diff --git a/net/tipc/core.c b/net/tipc/core.c index ab648dd150ee..fc01a13d7462 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -44,6 +44,7 @@ #include "socket.h" #include "bcast.h" #include "node.h" +#include "crypto.h" #include <linux/module.h> @@ -68,6 +69,11 @@ static int __net_init tipc_init_net(struct net *net) INIT_LIST_HEAD(&tn->node_list); spin_lock_init(&tn->node_list_lock); +#ifdef CONFIG_TIPC_CRYPTO + err = tipc_crypto_start(&tn->crypto_tx, net, NULL); + if (err) + goto out_crypto; +#endif err = tipc_sk_rht_init(net); if (err) goto out_sk_rht; @@ -93,6 +99,11 @@ static int __net_init tipc_init_net(struct net *net) out_nametbl: tipc_sk_rht_destroy(net); out_sk_rht: + +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&tn->crypto_tx); +out_crypto: +#endif return err; } @@ -103,6 +114,9 @@ static void __net_exit tipc_exit_net(struct net *net) tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); +#ifdef CONFIG_TIPC_CRYPTO + tipc_crypto_stop(&tipc_net(net)->crypto_tx); +#endif } static void __net_exit tipc_pernet_pre_exit(struct net *net) diff --git a/net/tipc/core.h b/net/tipc/core.h index 8776d32a4a47..775848a5f27e 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -68,6 +68,9 @@ struct tipc_link; struct tipc_name_table; struct tipc_topsrv; struct tipc_monitor; +#ifdef CONFIG_TIPC_CRYPTO +struct tipc_crypto; +#endif #define TIPC_MOD_VER "2.0.0" @@ -129,6 +132,11 @@ struct tipc_net { /* Tracing of node internal messages */ struct packet_type loopback_pt; + +#ifdef CONFIG_TIPC_CRYPTO + /* TX crypto handler */ + struct tipc_crypto *crypto_tx; +#endif }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c new file mode 100644 index 000000000000..05f7ca76e8ce --- /dev/null +++ b/net/tipc/crypto.c @@ -0,0 +1,1986 @@ +// SPDX-License-Identifier: GPL-2.0 +/** + * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption + * + * Copyright (c) 2019, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <crypto/aead.h> +#include <crypto/aes.h> +#include "crypto.h" + +#define TIPC_TX_PROBE_LIM msecs_to_jiffies(1000) /* > 1s */ +#define TIPC_TX_LASTING_LIM msecs_to_jiffies(120000) /* 2 mins */ +#define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */ +#define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(180000) /* 3 mins */ +#define TIPC_MAX_TFMS_DEF 10 +#define TIPC_MAX_TFMS_LIM 1000 + +/** + * TIPC Key ids + */ +enum { + KEY_UNUSED = 0, + KEY_MIN, + KEY_1 = KEY_MIN, + KEY_2, + KEY_3, + KEY_MAX = KEY_3, +}; + +/** + * TIPC Crypto statistics + */ +enum { + STAT_OK, + STAT_NOK, + STAT_ASYNC, + STAT_ASYNC_OK, + STAT_ASYNC_NOK, + STAT_BADKEYS, /* tx only */ + STAT_BADMSGS = STAT_BADKEYS, /* rx only */ + STAT_NOKEYS, + STAT_SWITCHES, + + MAX_STATS, +}; + +/* TIPC crypto statistics' header */ +static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok", + "async_nok", "badmsgs", "nokeys", + "switches"}; + +/* Max TFMs number per key */ +int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF; + +/** + * struct tipc_key - TIPC keys' status indicator + * + * 7 6 5 4 3 2 1 0 + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * key: | (reserved)|passive idx| active idx|pending idx| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + */ +struct tipc_key { +#define KEY_BITS (2) +#define KEY_MASK ((1 << KEY_BITS) - 1) + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 pending:2, + active:2, + passive:2, /* rx only */ + reserved:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:2, + passive:2, /* rx only */ + active:2, + pending:2; +#else +#error "Please fix <asm/byteorder.h>" +#endif + } __packed; + u8 keys; + }; +}; + +/** + * struct tipc_tfm - TIPC TFM structure to form a list of TFMs + */ +struct tipc_tfm { + struct crypto_aead *tfm; + struct list_head list; +}; + +/** + * struct tipc_aead - TIPC AEAD key structure + * @tfm_entry: per-cpu pointer to one entry in TFM list + * @crypto: TIPC crypto owns this key + * @cloned: reference to the source key in case cloning + * @users: the number of the key users (TX/RX) + * @salt: the key's SALT value + * @authsize: authentication tag size (max = 16) + * @mode: crypto mode is applied to the key + * @hint[]: a hint for user key + * @rcu: struct rcu_head + * @seqno: the key seqno (cluster scope) + * @refcnt: the key reference counter + */ +struct tipc_aead { +#define TIPC_AEAD_HINT_LEN (5) + struct tipc_tfm * __percpu *tfm_entry; + struct tipc_crypto *crypto; + struct tipc_aead *cloned; + atomic_t users; + u32 salt; + u8 authsize; + u8 mode; + char hint[TIPC_AEAD_HINT_LEN + 1]; + struct rcu_head rcu; + + atomic64_t seqno ____cacheline_aligned; + refcount_t refcnt ____cacheline_aligned; + +} ____cacheline_aligned; + +/** + * struct tipc_crypto_stats - TIPC Crypto statistics + */ +struct tipc_crypto_stats { + unsigned int stat[MAX_STATS]; +}; + +/** + * struct tipc_crypto - TIPC TX/RX crypto structure + * @net: struct net + * @node: TIPC node (RX) + * @aead: array of pointers to AEAD keys for encryption/decryption + * @peer_rx_active: replicated peer RX active key index + * @key: the key states + * @working: the crypto is working or not + * @stats: the crypto statistics + * @sndnxt: the per-peer sndnxt (TX) + * @timer1: general timer 1 (jiffies) + * @timer2: general timer 1 (jiffies) + * @lock: tipc_key lock + */ +struct tipc_crypto { + struct net *net; + struct tipc_node *node; + struct tipc_aead __rcu *aead[KEY_MAX + 1]; /* key[0] is UNUSED */ + atomic_t peer_rx_active; + struct tipc_key key; + u8 working:1; + struct tipc_crypto_stats __percpu *stats; + + atomic64_t sndnxt ____cacheline_aligned; + unsigned long timer1; + unsigned long timer2; + spinlock_t lock; /* crypto lock */ + +} ____cacheline_aligned; + +/* struct tipc_crypto_tx_ctx - TX context for callbacks */ +struct tipc_crypto_tx_ctx { + struct tipc_aead *aead; + struct tipc_bearer *bearer; + struct tipc_media_addr dst; +}; + +/* struct tipc_crypto_rx_ctx - RX context for callbacks */ +struct tipc_crypto_rx_ctx { + struct tipc_aead *aead; + struct tipc_bearer *bearer; +}; + +static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead); +static inline void tipc_aead_put(struct tipc_aead *aead); +static void tipc_aead_free(struct rcu_head *rp); +static int tipc_aead_users(struct tipc_aead __rcu *aead); +static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim); +static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim); +static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val); +static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead); +static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, + u8 mode); +static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src); +static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, + unsigned int crypto_ctx_size, + u8 **iv, struct aead_request **req, + struct scatterlist **sg, int nsg); +static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode); +static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err); +static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, + struct sk_buff *skb, struct tipc_bearer *b); +static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err); +static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr); +static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, + u8 tx_key, struct sk_buff *skb, + struct tipc_crypto *__rx); +static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, + u8 new_passive, + u8 new_active, + u8 new_pending); +static int tipc_crypto_key_attach(struct tipc_crypto *c, + struct tipc_aead *aead, u8 pos); +static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending); +static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, + struct tipc_crypto *rx, + struct sk_buff *skb); +static void tipc_crypto_key_synch(struct tipc_crypto *rx, u8 new_rx_active, + struct tipc_msg *hdr); +static int tipc_crypto_key_revoke(struct net *net, u8 tx_key); +static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, + struct tipc_bearer *b, + struct sk_buff **skb, int err); +static void tipc_crypto_do_cmd(struct net *net, int cmd); +static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf); +#ifdef TIPC_CRYPTO_DEBUG +static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, + char *buf); +#endif + +#define key_next(cur) ((cur) % KEY_MAX + 1) + +#define tipc_aead_rcu_ptr(rcu_ptr, lock) \ + rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock)) + +#define tipc_aead_rcu_swap(rcu_ptr, ptr, lock) \ + rcu_swap_protected((rcu_ptr), (ptr), lockdep_is_held(lock)) + +#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \ +do { \ + typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr), \ + lockdep_is_held(lock)); \ + rcu_assign_pointer((rcu_ptr), (ptr)); \ + tipc_aead_put(__tmp); \ +} while (0) + +#define tipc_crypto_key_detach(rcu_ptr, lock) \ + tipc_aead_rcu_replace((rcu_ptr), NULL, lock) + +/** + * tipc_aead_key_validate - Validate a AEAD user key + */ +int tipc_aead_key_validate(struct tipc_aead_key *ukey) +{ + int keylen; + + /* Check if algorithm exists */ + if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) { + pr_info("Not found cipher: \"%s\"!\n", ukey->alg_name); + return -ENODEV; + } + + /* Currently, we only support the "gcm(aes)" cipher algorithm */ + if (strcmp(ukey->alg_name, "gcm(aes)")) + return -ENOTSUPP; + + /* Check if key size is correct */ + keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; + if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 && + keylen != TIPC_AES_GCM_KEY_SIZE_192 && + keylen != TIPC_AES_GCM_KEY_SIZE_256)) + return -EINVAL; + + return 0; +} + +static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt))) + tmp = NULL; + rcu_read_unlock(); + + return tmp; +} + +static inline void tipc_aead_put(struct tipc_aead *aead) +{ + if (aead && refcount_dec_and_test(&aead->refcnt)) + call_rcu(&aead->rcu, tipc_aead_free); +} + +/** + * tipc_aead_free - Release AEAD key incl. all the TFMs in the list + * @rp: rcu head pointer + */ +static void tipc_aead_free(struct rcu_head *rp) +{ + struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu); + struct tipc_tfm *tfm_entry, *head, *tmp; + + if (aead->cloned) { + tipc_aead_put(aead->cloned); + } else { + head = *this_cpu_ptr(aead->tfm_entry); + list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) { + crypto_free_aead(tfm_entry->tfm); + list_del(&tfm_entry->list); + kfree(tfm_entry); + } + /* Free the head */ + crypto_free_aead(head->tfm); + list_del(&head->list); + kfree(head); + } + free_percpu(aead->tfm_entry); + kfree(aead); +} + +static int tipc_aead_users(struct tipc_aead __rcu *aead) +{ + struct tipc_aead *tmp; + int users = 0; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + users = atomic_read(&tmp->users); + rcu_read_unlock(); + + return users; +} + +static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + atomic_add_unless(&tmp->users, 1, lim); + rcu_read_unlock(); +} + +static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim) +{ + struct tipc_aead *tmp; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) + atomic_add_unless(&rcu_dereference(aead)->users, -1, lim); + rcu_read_unlock(); +} + +static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val) +{ + struct tipc_aead *tmp; + int cur; + + rcu_read_lock(); + tmp = rcu_dereference(aead); + if (tmp) { + do { + cur = atomic_read(&tmp->users); + if (cur == val) + break; + } while (atomic_cmpxchg(&tmp->users, cur, val) != cur); + } + rcu_read_unlock(); +} + +/** + * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it + */ +static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead) +{ + struct tipc_tfm **tfm_entry = this_cpu_ptr(aead->tfm_entry); + + *tfm_entry = list_next_entry(*tfm_entry, list); + return (*tfm_entry)->tfm; +} + +/** + * tipc_aead_init - Initiate TIPC AEAD + * @aead: returned new TIPC AEAD key handle pointer + * @ukey: pointer to user key data + * @mode: the key mode + * + * Allocate a (list of) new cipher transformation (TFM) with the specific user + * key data if valid. The number of the allocated TFMs can be set via the sysfs + * "net/tipc/max_tfms" first. + * Also, all the other AEAD data are also initialized. + * + * Return: 0 if the initiation is successful, otherwise: < 0 + */ +static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, + u8 mode) +{ + struct tipc_tfm *tfm_entry, *head; + struct crypto_aead *tfm; + struct tipc_aead *tmp; + int keylen, err, cpu; + int tfm_cnt = 0; + + if (unlikely(*aead)) + return -EEXIST; + + /* Allocate a new AEAD */ + tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); + if (unlikely(!tmp)) + return -ENOMEM; + + /* The key consists of two parts: [AES-KEY][SALT] */ + keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; + + /* Allocate per-cpu TFM entry pointer */ + tmp->tfm_entry = alloc_percpu(struct tipc_tfm *); + if (!tmp->tfm_entry) { + kzfree(tmp); + return -ENOMEM; + } + + /* Make a list of TFMs with the user key data */ + do { + tfm = crypto_alloc_aead(ukey->alg_name, 0, 0); + if (IS_ERR(tfm)) { + err = PTR_ERR(tfm); + break; + } + + if (unlikely(!tfm_cnt && + crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) { + crypto_free_aead(tfm); + err = -ENOTSUPP; + break; + } + + err |= crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE); + err |= crypto_aead_setkey(tfm, ukey->key, keylen); + if (unlikely(err)) { + crypto_free_aead(tfm); + break; + } + + tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL); + if (unlikely(!tfm_entry)) { + crypto_free_aead(tfm); + err = -ENOMEM; + break; + } + INIT_LIST_HEAD(&tfm_entry->list); + tfm_entry->tfm = tfm; + + /* First entry? */ + if (!tfm_cnt) { + head = tfm_entry; + for_each_possible_cpu(cpu) { + *per_cpu_ptr(tmp->tfm_entry, cpu) = head; + } + } else { + list_add_tail(&tfm_entry->list, &head->list); + } + + } while (++tfm_cnt < sysctl_tipc_max_tfms); + + /* Not any TFM is allocated? */ + if (!tfm_cnt) { + free_percpu(tmp->tfm_entry); + kzfree(tmp); + return err; + } + + /* Copy some chars from the user key as a hint */ + memcpy(tmp->hint, ukey->key, TIPC_AEAD_HINT_LEN); + tmp->hint[TIPC_AEAD_HINT_LEN] = '\0'; + + /* Initialize the other data */ + tmp->mode = mode; + tmp->cloned = NULL; + tmp->authsize = TIPC_AES_GCM_TAG_SIZE; + memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE); + atomic_set(&tmp->users, 0); + atomic64_set(&tmp->seqno, 0); + refcount_set(&tmp->refcnt, 1); + + *aead = tmp; + return 0; +} + +/** + * tipc_aead_clone - Clone a TIPC AEAD key + * @dst: dest key for the cloning + * @src: source key to clone from + * + * Make a "copy" of the source AEAD key data to the dest, the TFMs list is + * common for the keys. + * A reference to the source is hold in the "cloned" pointer for the later + * freeing purposes. + * + * Note: this must be done in cluster-key mode only! + * Return: 0 in case of success, otherwise < 0 + */ +static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src) +{ + struct tipc_aead *aead; + int cpu; + + if (!src) + return -ENOKEY; + + if (src->mode != CLUSTER_KEY) + return -EINVAL; + + if (unlikely(*dst)) + return -EEXIST; + + aead = kzalloc(sizeof(*aead), GFP_ATOMIC); + if (unlikely(!aead)) + return -ENOMEM; + + aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC); + if (unlikely(!aead->tfm_entry)) { + kzfree(aead); + return -ENOMEM; + } + + for_each_possible_cpu(cpu) { + *per_cpu_ptr(aead->tfm_entry, cpu) = + *per_cpu_ptr(src->tfm_entry, cpu); + } + + memcpy(aead->hint, src->hint, sizeof(src->hint)); + aead->mode = src->mode; + aead->salt = src->salt; + aead->authsize = src->authsize; + atomic_set(&aead->users, 0); + atomic64_set(&aead->seqno, 0); + refcount_set(&aead->refcnt, 1); + + WARN_ON(!refcount_inc_not_zero(&src->refcnt)); + aead->cloned = src; + + *dst = aead; + return 0; +} + +/** + * tipc_aead_mem_alloc - Allocate memory for AEAD request operations + * @tfm: cipher handle to be registered with the request + * @crypto_ctx_size: size of crypto context for callback + * @iv: returned pointer to IV data + * @req: returned pointer to AEAD request data + * @sg: returned pointer to SG lists + * @nsg: number of SG lists to be allocated + * + * Allocate memory to store the crypto context data, AEAD request, IV and SG + * lists, the memory layout is as follows: + * crypto_ctx || iv || aead_req || sg[] + * + * Return: the pointer to the memory areas in case of success, otherwise NULL + */ +static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, + unsigned int crypto_ctx_size, + u8 **iv, struct aead_request **req, + struct scatterlist **sg, int nsg) +{ + unsigned int iv_size, req_size; + unsigned int len; + u8 *mem; + + iv_size = crypto_aead_ivsize(tfm); + req_size = sizeof(**req) + crypto_aead_reqsize(tfm); + + len = crypto_ctx_size; + len += iv_size; + len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); + len = ALIGN(len, crypto_tfm_ctx_alignment()); + len += req_size; + len = ALIGN(len, __alignof__(struct scatterlist)); + len += nsg * sizeof(**sg); + + mem = kmalloc(len, GFP_ATOMIC); + if (!mem) + return NULL; + + *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size, + crypto_aead_alignmask(tfm) + 1); + *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, + crypto_tfm_ctx_alignment()); + *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, + __alignof__(struct scatterlist)); + + return (void *)mem; +} + +/** + * tipc_aead_encrypt - Encrypt a message + * @aead: TIPC AEAD key for the message encryption + * @skb: the input/output skb + * @b: TIPC bearer where the message will be delivered after the encryption + * @dst: the destination media address + * @__dnode: TIPC dest node if "known" + * + * Return: + * 0 : if the encryption has completed + * -EINPROGRESS/-EBUSY : if a callback will be performed + * < 0 : the encryption has failed + */ +static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct crypto_aead *tfm = tipc_aead_tfm_next(aead); + struct tipc_crypto_tx_ctx *tx_ctx; + struct aead_request *req; + struct sk_buff *trailer; + struct scatterlist *sg; + struct tipc_ehdr *ehdr; + int ehsz, len, tailen, nsg, rc; + void *ctx; + u32 salt; + u8 *iv; + + /* Make sure message len at least 4-byte aligned */ + len = ALIGN(skb->len, 4); + tailen = len - skb->len + aead->authsize; + + /* Expand skb tail for authentication tag: + * As for simplicity, we'd have made sure skb having enough tailroom + * for authentication tag @skb allocation. Even when skb is nonlinear + * but there is no frag_list, it should be still fine! + * Otherwise, we must cow it to be a writable buffer with the tailroom. + */ +#ifdef TIPC_CRYPTO_DEBUG + SKB_LINEAR_ASSERT(skb); + if (tailen > skb_tailroom(skb)) { + pr_warn("TX: skb tailroom is not enough: %d, requires: %d\n", + skb_tailroom(skb), tailen); + } +#endif + + if (unlikely(!skb_cloned(skb) && tailen <= skb_tailroom(skb))) { + nsg = 1; + trailer = skb; + } else { + /* TODO: We could avoid skb_cow_data() if skb has no frag_list + * e.g. by skb_fill_page_desc() to add another page to the skb + * with the wanted tailen... However, page skbs look not often, + * so take it easy now! + * Cloned skbs e.g. from link_xmit() seems no choice though :( + */ + nsg = skb_cow_data(skb, tailen, &trailer); + if (unlikely(nsg < 0)) { + pr_err("TX: skb_cow_data() returned %d\n", nsg); + return nsg; + } + } + + pskb_put(skb, trailer, tailen); + + /* Allocate memory for the AEAD operation */ + ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg); + if (unlikely(!ctx)) + return -ENOMEM; + TIPC_SKB_CB(skb)->crypto_ctx = ctx; + + /* Map skb to the sg lists */ + sg_init_table(sg, nsg); + rc = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(rc < 0)) { + pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg); + goto exit; + } + + /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)] + * In case we're in cluster-key mode, SALT is varied by xor-ing with + * the source address (or w0 of id), otherwise with the dest address + * if dest is known. + */ + ehdr = (struct tipc_ehdr *)skb->data; + salt = aead->salt; + if (aead->mode == CLUSTER_KEY) + salt ^= ehdr->addr; /* __be32 */ + else if (__dnode) + salt ^= tipc_node_get_addr(__dnode); + memcpy(iv, &salt, 4); + memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); + + /* Prepare request */ + ehsz = tipc_ehdr_size(ehdr); + aead_request_set_tfm(req, tfm); + aead_request_set_ad(req, ehsz); + aead_request_set_crypt(req, sg, sg, len - ehsz, iv); + + /* Set callback function & data */ + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tipc_aead_encrypt_done, skb); + tx_ctx = (struct tipc_crypto_tx_ctx *)ctx; + tx_ctx->aead = aead; + tx_ctx->bearer = b; + memcpy(&tx_ctx->dst, dst, sizeof(*dst)); + + /* Hold bearer */ + if (unlikely(!tipc_bearer_hold(b))) { + rc = -ENODEV; + goto exit; + } + + /* Now, do encrypt */ + rc = crypto_aead_encrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); + +exit: + kfree(ctx); + TIPC_SKB_CB(skb)->crypto_ctx = NULL; + return rc; +} + +static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; + struct tipc_bearer *b = tx_ctx->bearer; + struct tipc_aead *aead = tx_ctx->aead; + struct tipc_crypto *tx = aead->crypto; + struct net *net = tx->net; + + switch (err) { + case 0: + this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]); + if (likely(test_bit(0, &b->up))) + b->media->send_msg(net, skb, b, &tx_ctx->dst); + else + kfree_skb(skb); + break; + case -EINPROGRESS: + return; + default: + this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]); + kfree_skb(skb); + break; + } + + kfree(tx_ctx); + tipc_bearer_put(b); + tipc_aead_put(aead); +} + +/** + * tipc_aead_decrypt - Decrypt an encrypted message + * @net: struct net + * @aead: TIPC AEAD for the message decryption + * @skb: the input/output skb + * @b: TIPC bearer where the message has been received + * + * Return: + * 0 : if the decryption has completed + * -EINPROGRESS/-EBUSY : if a callback will be performed + * < 0 : the decryption has failed + */ +static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, + struct sk_buff *skb, struct tipc_bearer *b) +{ + struct tipc_crypto_rx_ctx *rx_ctx; + struct aead_request *req; + struct crypto_aead *tfm; + struct sk_buff *unused; + struct scatterlist *sg; + struct tipc_ehdr *ehdr; + int ehsz, nsg, rc; + void *ctx; + u32 salt; + u8 *iv; + + if (unlikely(!aead)) + return -ENOKEY; + + /* Cow skb data if needed */ + if (likely(!skb_cloned(skb) && + (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) { + nsg = 1 + skb_shinfo(skb)->nr_frags; + } else { + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; + } + } + + /* Allocate memory for the AEAD operation */ + tfm = tipc_aead_tfm_next(aead); + ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg); + if (unlikely(!ctx)) + return -ENOMEM; + TIPC_SKB_CB(skb)->crypto_ctx = ctx; + + /* Map skb to the sg lists */ + sg_init_table(sg, nsg); + rc = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(rc < 0)) { + pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg); + goto exit; + } + + /* Reconstruct IV: */ + ehdr = (struct tipc_ehdr *)skb->data; + salt = aead->salt; + if (aead->mode == CLUSTER_KEY) + salt ^= ehdr->addr; /* __be32 */ + else if (ehdr->destined) + salt ^= tipc_own_addr(net); + memcpy(iv, &salt, 4); + memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); + + /* Prepare request */ + ehsz = tipc_ehdr_size(ehdr); + aead_request_set_tfm(req, tfm); + aead_request_set_ad(req, ehsz); + aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv); + + /* Set callback function & data */ + aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tipc_aead_decrypt_done, skb); + rx_ctx = (struct tipc_crypto_rx_ctx *)ctx; + rx_ctx->aead = aead; + rx_ctx->bearer = b; + + /* Hold bearer */ + if (unlikely(!tipc_bearer_hold(b))) { + rc = -ENODEV; + goto exit; + } + + /* Now, do decrypt */ + rc = crypto_aead_decrypt(req); + if (rc == -EINPROGRESS || rc == -EBUSY) + return rc; + + tipc_bearer_put(b); + +exit: + kfree(ctx); + TIPC_SKB_CB(skb)->crypto_ctx = NULL; + return rc; +} + +static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; + struct tipc_bearer *b = rx_ctx->bearer; + struct tipc_aead *aead = rx_ctx->aead; + struct tipc_crypto_stats __percpu *stats = aead->crypto->stats; + struct net *net = aead->crypto->net; + + switch (err) { + case 0: + this_cpu_inc(stats->stat[STAT_ASYNC_OK]); + break; + case -EINPROGRESS: + return; + default: + this_cpu_inc(stats->stat[STAT_ASYNC_NOK]); + break; + } + + kfree(rx_ctx); + tipc_crypto_rcv_complete(net, aead, b, &skb, err); + if (likely(skb)) { + if (likely(test_bit(0, &b->up))) + tipc_rcv(net, skb, b); + else + kfree_skb(skb); + } + + tipc_bearer_put(b); +} + +static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr) +{ + return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; +} + +/** + * tipc_ehdr_validate - Validate an encryption message + * @skb: the message buffer + * + * Returns "true" if this is a valid encryption message, otherwise "false" + */ +bool tipc_ehdr_validate(struct sk_buff *skb) +{ + struct tipc_ehdr *ehdr; + int ehsz; + + if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE))) + return false; + + ehdr = (struct tipc_ehdr *)skb->data; + if (unlikely(ehdr->version != TIPC_EVERSION)) + return false; + ehsz = tipc_ehdr_size(ehdr); + if (unlikely(!pskb_may_pull(skb, ehsz))) + return false; + if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE)) + return false; + if (unlikely(!ehdr->tx_key)) + return false; + + return true; +} + +/** + * tipc_ehdr_build - Build TIPC encryption message header + * @net: struct net + * @aead: TX AEAD key to be used for the message encryption + * @tx_key: key id used for the message encryption + * @skb: input/output message skb + * @__rx: RX crypto handle if dest is "known" + * + * Return: the header size if the building is successful, otherwise < 0 + */ +static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, + u8 tx_key, struct sk_buff *skb, + struct tipc_crypto *__rx) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_ehdr *ehdr; + u32 user = msg_user(hdr); + u64 seqno; + int ehsz; + + /* Make room for encryption header */ + ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; + WARN_ON(skb_headroom(skb) < ehsz); + ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz); + + /* Obtain a seqno first: + * Use the key seqno (= cluster wise) if dest is unknown or we're in + * cluster key mode, otherwise it's better for a per-peer seqno! + */ + if (!__rx || aead->mode == CLUSTER_KEY) + seqno = atomic64_inc_return(&aead->seqno); + else + seqno = atomic64_inc_return(&__rx->sndnxt); + + /* Revoke the key if seqno is wrapped around */ + if (unlikely(!seqno)) + return tipc_crypto_key_revoke(net, tx_key); + + /* Word 1-2 */ + ehdr->seqno = cpu_to_be64(seqno); + + /* Words 0, 3- */ + ehdr->version = TIPC_EVERSION; + ehdr->user = 0; + ehdr->keepalive = 0; + ehdr->tx_key = tx_key; + ehdr->destined = (__rx) ? 1 : 0; + ehdr->rx_key_active = (__rx) ? __rx->key.active : 0; + ehdr->reserved_1 = 0; + ehdr->reserved_2 = 0; + + switch (user) { + case LINK_CONFIG: + ehdr->user = LINK_CONFIG; + memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN); + break; + default: + if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) { + ehdr->user = LINK_PROTOCOL; + ehdr->keepalive = msg_is_keepalive(hdr); + } + ehdr->addr = hdr->hdr[3]; + break; + } + + return ehsz; +} + +static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, + u8 new_passive, + u8 new_active, + u8 new_pending) +{ +#ifdef TIPC_CRYPTO_DEBUG + struct tipc_key old = c->key; + char buf[32]; +#endif + + c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) | + ((new_active & KEY_MASK) << (KEY_BITS)) | + ((new_pending & KEY_MASK)); + +#ifdef TIPC_CRYPTO_DEBUG + pr_info("%s(%s): key changing %s ::%pS\n", + (c->node) ? "RX" : "TX", + (c->node) ? tipc_node_get_id_str(c->node) : + tipc_own_id_string(c->net), + tipc_key_change_dump(old, c->key, buf), + __builtin_return_address(0)); +#endif +} + +/** + * tipc_crypto_key_init - Initiate a new user / AEAD key + * @c: TIPC crypto to which new key is attached + * @ukey: the user key + * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY) + * + * A new TIPC AEAD key will be allocated and initiated with the specified user + * key, then attached to the TIPC crypto. + * + * Return: new key id in case of success, otherwise: < 0 + */ +int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey, + u8 mode) +{ + struct tipc_aead *aead = NULL; + int rc = 0; + + /* Initiate with the new user key */ + rc = tipc_aead_init(&aead, ukey, mode); + + /* Attach it to the crypto */ + if (likely(!rc)) { + rc = tipc_crypto_key_attach(c, aead, 0); + if (rc < 0) + tipc_aead_free(&aead->rcu); + } + + pr_info("%s(%s): key initiating, rc %d!\n", + (c->node) ? "RX" : "TX", + (c->node) ? tipc_node_get_id_str(c->node) : + tipc_own_id_string(c->net), + rc); + + return rc; +} + +/** + * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto + * @c: TIPC crypto to which the new AEAD key is attached + * @aead: the new AEAD key pointer + * @pos: desired slot in the crypto key array, = 0 if any! + * + * Return: new key id in case of success, otherwise: -EBUSY + */ +static int tipc_crypto_key_attach(struct tipc_crypto *c, + struct tipc_aead *aead, u8 pos) +{ + u8 new_pending, new_passive, new_key; + struct tipc_key key; + int rc = -EBUSY; + + spin_lock_bh(&c->lock); + key = c->key; + if (key.active && key.passive) + goto exit; + if (key.passive && !tipc_aead_users(c->aead[key.passive])) + goto exit; + if (key.pending) { + if (pos) + goto exit; + if (tipc_aead_users(c->aead[key.pending]) > 0) + goto exit; + /* Replace it */ + new_pending = key.pending; + new_passive = key.passive; + new_key = new_pending; + } else { + if (pos) { + if (key.active && pos != key_next(key.active)) { + new_pending = key.pending; + new_passive = pos; + new_key = new_passive; + goto attach; + } else if (!key.active && !key.passive) { + new_pending = pos; + new_passive = key.passive; + new_key = new_pending; + goto attach; + } + } + new_pending = key_next(key.active ?: key.passive); + new_passive = key.passive; + new_key = new_pending; + } + +attach: + aead->crypto = c; + tipc_crypto_key_set_state(c, new_passive, key.active, new_pending); + tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock); + + c->working = 1; + c->timer1 = jiffies; + c->timer2 = jiffies; + rc = new_key; + +exit: + spin_unlock_bh(&c->lock); + return rc; +} + +void tipc_crypto_key_flush(struct tipc_crypto *c) +{ + int k; + + spin_lock_bh(&c->lock); + c->working = 0; + tipc_crypto_key_set_state(c, 0, 0, 0); + for (k = KEY_MIN; k <= KEY_MAX; k++) + tipc_crypto_key_detach(c->aead[k], &c->lock); + atomic_set(&c->peer_rx_active, 0); + atomic64_set(&c->sndnxt, 0); + spin_unlock_bh(&c->lock); +} + +/** + * tipc_crypto_key_try_align - Align RX keys if possible + * @rx: RX crypto handle + * @new_pending: new pending slot if aligned (= TX key from peer) + * + * Peer has used an unknown key slot, this only happens when peer has left and + * rejoned, or we are newcomer. + * That means, there must be no active key but a pending key at unaligned slot. + * If so, we try to move the pending key to the new slot. + * Note: A potential passive key can exist, it will be shifted correspondingly! + * + * Return: "true" if key is successfully aligned, otherwise "false" + */ +static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending) +{ + struct tipc_aead *tmp1, *tmp2 = NULL; + struct tipc_key key; + bool aligned = false; + u8 new_passive = 0; + int x; + + spin_lock(&rx->lock); + key = rx->key; + if (key.pending == new_pending) { + aligned = true; + goto exit; + } + if (key.active) + goto exit; + if (!key.pending) + goto exit; + if (tipc_aead_users(rx->aead[key.pending]) > 0) + goto exit; + + /* Try to "isolate" this pending key first */ + tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock); + if (!refcount_dec_if_one(&tmp1->refcnt)) + goto exit; + rcu_assign_pointer(rx->aead[key.pending], NULL); + + /* Move passive key if any */ + if (key.passive) { + tipc_aead_rcu_swap(rx->aead[key.passive], tmp2, &rx->lock); + x = (key.passive - key.pending + new_pending) % KEY_MAX; + new_passive = (x <= 0) ? x + KEY_MAX : x; + } + + /* Re-allocate the key(s) */ + tipc_crypto_key_set_state(rx, new_passive, 0, new_pending); + rcu_assign_pointer(rx->aead[new_pending], tmp1); + if (new_passive) + rcu_assign_pointer(rx->aead[new_passive], tmp2); + refcount_set(&tmp1->refcnt, 1); + aligned = true; + pr_info("RX(%s): key is aligned!\n", tipc_node_get_id_str(rx->node)); + +exit: + spin_unlock(&rx->lock); + return aligned; +} + +/** + * tipc_crypto_key_pick_tx - Pick one TX key for message decryption + * @tx: TX crypto handle + * @rx: RX crypto handle (can be NULL) + * @skb: the message skb which will be decrypted later + * + * This function looks up the existing TX keys and pick one which is suitable + * for the message decryption, that must be a cluster key and not used before + * on the same message (i.e. recursive). + * + * Return: the TX AEAD key handle in case of success, otherwise NULL + */ +static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, + struct tipc_crypto *rx, + struct sk_buff *skb) +{ + struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb); + struct tipc_aead *aead = NULL; + struct tipc_key key = tx->key; + u8 k, i = 0; + + /* Initialize data if not yet */ + if (!skb_cb->tx_clone_deferred) { + skb_cb->tx_clone_deferred = 1; + memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); + } + + skb_cb->tx_clone_ctx.rx = rx; + if (++skb_cb->tx_clone_ctx.recurs > 2) + return NULL; + + /* Pick one TX key */ + spin_lock(&tx->lock); + do { + k = (i == 0) ? key.pending : + ((i == 1) ? key.active : key.passive); + if (!k) + continue; + aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock); + if (!aead) + continue; + if (aead->mode != CLUSTER_KEY || + aead == skb_cb->tx_clone_ctx.last) { + aead = NULL; + continue; + } + /* Ok, found one cluster key */ + skb_cb->tx_clone_ctx.last = aead; + WARN_ON(skb->next); + skb->next = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb->next)) + pr_warn("Failed to clone skb for next round if any\n"); + WARN_ON(!refcount_inc_not_zero(&aead->refcnt)); + break; + } while (++i < 3); + spin_unlock(&tx->lock); + + return aead; +} + +/** + * tipc_crypto_key_synch: Synch own key data according to peer key status + * @rx: RX crypto handle + * @new_rx_active: latest RX active key from peer + * @hdr: TIPCv2 message + * + * This function updates the peer node related data as the peer RX active key + * has changed, so the number of TX keys' users on this node are increased and + * decreased correspondingly. + * + * The "per-peer" sndnxt is also reset when the peer key has switched. + */ +static void tipc_crypto_key_synch(struct tipc_crypto *rx, u8 new_rx_active, + struct tipc_msg *hdr) +{ + struct net *net = rx->net; + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + u8 cur_rx_active; + + /* TX might be even not ready yet */ + if (unlikely(!tx->key.active && !tx->key.pending)) + return; + + cur_rx_active = atomic_read(&rx->peer_rx_active); + if (likely(cur_rx_active == new_rx_active)) + return; + + /* Make sure this message destined for this node */ + if (unlikely(msg_short(hdr) || + msg_destnode(hdr) != tipc_own_addr(net))) + return; + + /* Peer RX active key has changed, try to update owns' & TX users */ + if (atomic_cmpxchg(&rx->peer_rx_active, + cur_rx_active, + new_rx_active) == cur_rx_active) { + if (new_rx_active) + tipc_aead_users_inc(tx->aead[new_rx_active], INT_MAX); + if (cur_rx_active) + tipc_aead_users_dec(tx->aead[cur_rx_active], 0); + + atomic64_set(&rx->sndnxt, 0); + /* Mark the point TX key users changed */ + tx->timer1 = jiffies; + +#ifdef TIPC_CRYPTO_DEBUG + pr_info("TX(%s): key users changed %d-- %d++, peer RX(%s)\n", + tipc_own_id_string(net), cur_rx_active, + new_rx_active, tipc_node_get_id_str(rx->node)); +#endif + } +} + +static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) +{ + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_key key; + + spin_lock(&tx->lock); + key = tx->key; + WARN_ON(!key.active || tx_key != key.active); + + /* Free the active key */ + tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); + tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); + spin_unlock(&tx->lock); + + pr_warn("TX(%s): key is revoked!\n", tipc_own_id_string(net)); + return -EKEYREVOKED; +} + +int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, + struct tipc_node *node) +{ + struct tipc_crypto *c; + + if (*crypto) + return -EEXIST; + + /* Allocate crypto */ + c = kzalloc(sizeof(*c), GFP_ATOMIC); + if (!c) + return -ENOMEM; + + /* Allocate statistic structure */ + c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); + if (!c->stats) { + kzfree(c); + return -ENOMEM; + } + + c->working = 0; + c->net = net; + c->node = node; + tipc_crypto_key_set_state(c, 0, 0, 0); + atomic_set(&c->peer_rx_active, 0); + atomic64_set(&c->sndnxt, 0); + c->timer1 = jiffies; + c->timer2 = jiffies; + spin_lock_init(&c->lock); + *crypto = c; + + return 0; +} + +void tipc_crypto_stop(struct tipc_crypto **crypto) +{ + struct tipc_crypto *c, *tx, *rx; + bool is_rx; + u8 k; + + if (!*crypto) + return; + + rcu_read_lock(); + /* RX stopping? => decrease TX key users if any */ + is_rx = !!((*crypto)->node); + if (is_rx) { + rx = *crypto; + tx = tipc_net(rx->net)->crypto_tx; + k = atomic_read(&rx->peer_rx_active); + if (k) { + tipc_aead_users_dec(tx->aead[k], 0); + /* Mark the point TX key users changed */ + tx->timer1 = jiffies; + } + } + + /* Release AEAD keys */ + c = *crypto; + for (k = KEY_MIN; k <= KEY_MAX; k++) + tipc_aead_put(rcu_dereference(c->aead[k])); + rcu_read_unlock(); + + pr_warn("%s(%s) has been purged, node left!\n", + (is_rx) ? "RX" : "TX", + (is_rx) ? tipc_node_get_id_str((*crypto)->node) : + tipc_own_id_string((*crypto)->net)); + + /* Free this crypto statistics */ + free_percpu(c->stats); + + *crypto = NULL; + kzfree(c); +} + +void tipc_crypto_timeout(struct tipc_crypto *rx) +{ + struct tipc_net *tn = tipc_net(rx->net); + struct tipc_crypto *tx = tn->crypto_tx; + struct tipc_key key; + u8 new_pending, new_passive; + int cmd; + + /* TX key activating: + * The pending key (users > 0) -> active + * The active key if any (users == 0) -> free + */ + spin_lock(&tx->lock); + key = tx->key; + if (key.active && tipc_aead_users(tx->aead[key.active]) > 0) + goto s1; + if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0) + goto s1; + if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_LIM)) + goto s1; + + tipc_crypto_key_set_state(tx, key.passive, key.pending, 0); + if (key.active) + tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); + this_cpu_inc(tx->stats->stat[STAT_SWITCHES]); + pr_info("TX(%s): key %d is activated!\n", tipc_own_id_string(tx->net), + key.pending); + +s1: + spin_unlock(&tx->lock); + + /* RX key activating: + * The pending key (users > 0) -> active + * The active key if any -> passive, freed later + */ + spin_lock(&rx->lock); + key = rx->key; + if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0) + goto s2; + + new_pending = (key.passive && + !tipc_aead_users(rx->aead[key.passive])) ? + key.passive : 0; + new_passive = (key.active) ?: ((new_pending) ? 0 : key.passive); + tipc_crypto_key_set_state(rx, new_passive, key.pending, new_pending); + this_cpu_inc(rx->stats->stat[STAT_SWITCHES]); + pr_info("RX(%s): key %d is activated!\n", + tipc_node_get_id_str(rx->node), key.pending); + goto s5; + +s2: + /* RX key "faulty" switching: + * The faulty pending key (users < -30) -> passive + * The passive key (users = 0) -> pending + * Note: This only happens after RX deactivated - s3! + */ + key = rx->key; + if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -30) + goto s3; + if (!key.passive || tipc_aead_users(rx->aead[key.passive]) != 0) + goto s3; + + new_pending = key.passive; + new_passive = key.pending; + tipc_crypto_key_set_state(rx, new_passive, key.active, new_pending); + goto s5; + +s3: + /* RX key deactivating: + * The passive key if any -> pending + * The active key -> passive (users = 0) / pending + * The pending key if any -> passive (users = 0) + */ + key = rx->key; + if (!key.active) + goto s4; + if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM)) + goto s4; + + new_pending = (key.passive) ?: key.active; + new_passive = (key.passive) ? key.active : key.pending; + tipc_aead_users_set(rx->aead[new_pending], 0); + if (new_passive) + tipc_aead_users_set(rx->aead[new_passive], 0); + tipc_crypto_key_set_state(rx, new_passive, 0, new_pending); + pr_info("RX(%s): key %d is deactivated!\n", + tipc_node_get_id_str(rx->node), key.active); + goto s5; + +s4: + /* RX key passive -> freed: */ + key = rx->key; + if (!key.passive || !tipc_aead_users(rx->aead[key.passive])) + goto s5; + if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM)) + goto s5; + + tipc_crypto_key_set_state(rx, 0, key.active, key.pending); + tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock); + pr_info("RX(%s): key %d is freed!\n", tipc_node_get_id_str(rx->node), + key.passive); + +s5: + spin_unlock(&rx->lock); + + /* Limit max_tfms & do debug commands if needed */ + if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM)) + return; + + cmd = sysctl_tipc_max_tfms; + sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF; + tipc_crypto_do_cmd(rx->net, cmd); +} + +/** + * tipc_crypto_xmit - Build & encrypt TIPC message for xmit + * @net: struct net + * @skb: input/output message skb pointer + * @b: bearer used for xmit later + * @dst: destination media address + * @__dnode: destination node for reference if any + * + * First, build an encryption message header on the top of the message, then + * encrypt the original TIPC message by using the active or pending TX key. + * If the encryption is successful, the encrypted skb is returned directly or + * via the callback. + * Otherwise, the skb is freed! + * + * Return: + * 0 : the encryption has succeeded (or no encryption) + * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made + * -ENOKEK : the encryption has failed due to no key + * -EKEYREVOKED : the encryption has failed due to key revoked + * -ENOMEM : the encryption has failed due to no memory + * < 0 : the encryption has failed due to other reasons + */ +int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, + struct tipc_bearer *b, struct tipc_media_addr *dst, + struct tipc_node *__dnode) +{ + struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode); + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_crypto_stats __percpu *stats = tx->stats; + struct tipc_key key = tx->key; + struct tipc_aead *aead = NULL; + struct sk_buff *probe; + int rc = -ENOKEY; + u8 tx_key; + + /* No encryption? */ + if (!tx->working) + return 0; + + /* Try with the pending key if available and: + * 1) This is the only choice (i.e. no active key) or; + * 2) Peer has switched to this key (unicast only) or; + * 3) It is time to do a pending key probe; + */ + if (unlikely(key.pending)) { + tx_key = key.pending; + if (!key.active) + goto encrypt; + if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key) + goto encrypt; + if (TIPC_SKB_CB(*skb)->probe) + goto encrypt; + if (!__rx && + time_after(jiffies, tx->timer2 + TIPC_TX_PROBE_LIM)) { + tx->timer2 = jiffies; + probe = skb_clone(*skb, GFP_ATOMIC); + if (probe) { + TIPC_SKB_CB(probe)->probe = 1; + tipc_crypto_xmit(net, &probe, b, dst, __dnode); + if (probe) + b->media->send_msg(net, probe, b, dst); + } + } + } + /* Else, use the active key if any */ + if (likely(key.active)) { + tx_key = key.active; + goto encrypt; + } + goto exit; + +encrypt: + aead = tipc_aead_get(tx->aead[tx_key]); + if (unlikely(!aead)) + goto exit; + rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx); + if (likely(rc > 0)) + rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode); + +exit: + switch (rc) { + case 0: + this_cpu_inc(stats->stat[STAT_OK]); + break; + case -EINPROGRESS: + case -EBUSY: + this_cpu_inc(stats->stat[STAT_ASYNC]); + *skb = NULL; + return rc; + default: + this_cpu_inc(stats->stat[STAT_NOK]); + if (rc == -ENOKEY) + this_cpu_inc(stats->stat[STAT_NOKEYS]); + else if (rc == -EKEYREVOKED) + this_cpu_inc(stats->stat[STAT_BADKEYS]); + kfree_skb(*skb); + *skb = NULL; + break; + } + + tipc_aead_put(aead); + return rc; +} + +/** + * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer + * @net: struct net + * @rx: RX crypto handle + * @skb: input/output message skb pointer + * @b: bearer where the message has been received + * + * If the decryption is successful, the decrypted skb is returned directly or + * as the callback, the encryption header and auth tag will be trimed out + * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete(). + * Otherwise, the skb will be freed! + * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX + * cluster key(s) can be taken for decryption (- recursive). + * + * Return: + * 0 : the decryption has successfully completed + * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made + * -ENOKEY : the decryption has failed due to no key + * -EBADMSG : the decryption has failed due to bad message + * -ENOMEM : the decryption has failed due to no memory + * < 0 : the decryption has failed due to other reasons + */ +int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, + struct sk_buff **skb, struct tipc_bearer *b) +{ + struct tipc_crypto *tx = tipc_net(net)->crypto_tx; + struct tipc_crypto_stats __percpu *stats; + struct tipc_aead *aead = NULL; + struct tipc_key key; + int rc = -ENOKEY; + u8 tx_key = 0; + + /* New peer? + * Let's try with TX key (i.e. cluster mode) & verify the skb first! + */ + if (unlikely(!rx)) + goto pick_tx; + + /* Pick RX key according to TX key, three cases are possible: + * 1) The current active key (likely) or; + * 2) The pending (new or deactivated) key (if any) or; + * 3) The passive or old active key (i.e. users > 0); + */ + tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key; + key = rx->key; + if (likely(tx_key == key.active)) + goto decrypt; + if (tx_key == key.pending) + goto decrypt; + if (tx_key == key.passive) { + rx->timer2 = jiffies; + if (tipc_aead_users(rx->aead[key.passive]) > 0) + goto decrypt; + } + + /* Unknown key, let's try to align RX key(s) */ + if (tipc_crypto_key_try_align(rx, tx_key)) + goto decrypt; + +pick_tx: + /* No key suitable? Try to pick one from TX... */ + aead = tipc_crypto_key_pick_tx(tx, rx, *skb); + if (aead) + goto decrypt; + goto exit; + +decrypt: + rcu_read_lock(); + if (!aead) + aead = tipc_aead_get(rx->aead[tx_key]); + rc = tipc_aead_decrypt(net, aead, *skb, b); + rcu_read_unlock(); + +exit: + stats = ((rx) ?: tx)->stats; + switch (rc) { + case 0: + this_cpu_inc(stats->stat[STAT_OK]); + break; + case -EINPROGRESS: + case -EBUSY: + this_cpu_inc(stats->stat[STAT_ASYNC]); + *skb = NULL; + return rc; + default: + this_cpu_inc(stats->stat[STAT_NOK]); + if (rc == -ENOKEY) { + kfree_skb(*skb); + *skb = NULL; + if (rx) + tipc_node_put(rx->node); + this_cpu_inc(stats->stat[STAT_NOKEYS]); + return rc; + } else if (rc == -EBADMSG) { + this_cpu_inc(stats->stat[STAT_BADMSGS]); + } + break; + } + + tipc_crypto_rcv_complete(net, aead, b, skb, rc); + return rc; +} + +static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, + struct tipc_bearer *b, + struct sk_buff **skb, int err) +{ + struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb); + struct tipc_crypto *rx = aead->crypto; + struct tipc_aead *tmp = NULL; + struct tipc_ehdr *ehdr; + struct tipc_node *n; + u8 rx_key_active; + bool destined; + + /* Is this completed by TX? */ + if (unlikely(!rx->node)) { + rx = skb_cb->tx_clone_ctx.rx; +#ifdef TIPC... [truncated message content] |
From: Tuong L. <tuo...@de...> - 2019-11-08 05:05:34
|
The new structure 'tipc_aead_key' is added to the 'tipc.h' for user to be able to transfer a key to TIPC in kernel. Netlink will be used for this purpose in the later commits. Acked-by: Ying Xue <yin...@wi...> Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Tuong Lien <tuo...@de...> --- include/uapi/linux/tipc.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 76421b878767..add01db1daef 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -233,6 +233,27 @@ struct tipc_sioc_nodeid_req { char node_id[TIPC_NODEID_LEN]; }; +/* + * TIPC Crypto, AEAD + */ +#define TIPC_AEAD_ALG_NAME (32) + +struct tipc_aead_key { + char alg_name[TIPC_AEAD_ALG_NAME]; + unsigned int keylen; /* in bytes */ + char key[]; +}; + +#define TIPC_AEAD_KEYLEN_MIN (16 + 4) +#define TIPC_AEAD_KEYLEN_MAX (32 + 4) +#define TIPC_AEAD_KEY_SIZE_MAX (sizeof(struct tipc_aead_key) + \ + TIPC_AEAD_KEYLEN_MAX) + +static inline int tipc_aead_key_size(struct tipc_aead_key *key) +{ + return sizeof(*key) + key->keylen; +} + /* The macros and functions below are deprecated: */ -- 2.13.7 |
From: Tuong L. <tuo...@de...> - 2019-11-08 05:05:33
|
When user sets RX key for a peer not existing on the own node, a new node entry is needed to which the RX key will be attached. However, since the peer node address (& capabilities) is unknown at that moment, only the node-ID is provided, this commit allows the creation of a node with only the data that we call as “preliminary”. A preliminary node is not the object of the “tipc_node_find()” but the “tipc_node_find_by_id()”. Once the first message i.e. LINK_CONFIG comes from that peer, and is successfully decrypted by the own node, the actual peer node data will be properly updated and the node will function as usual. In addition, the node timer always starts when a node object is created so if a preliminary node is not used, it will be cleaned up. The later encryption functions will also use the node timer and be able to create a preliminary node automatically when needed. Acked-by: Ying Xue <yin...@wi...> Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Tuong Lien <tuo...@de...> --- net/tipc/node.c | 99 +++++++++++++++++++++++++++++++++++++++++---------------- net/tipc/node.h | 1 + 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index b66d2f67b1dd..43d12a630f34 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -89,6 +89,7 @@ struct tipc_bclink_entry { * @links: array containing references to all links to node * @action_flags: bit mask of different types of node actions * @state: connectivity state vs peer node + * @preliminary: a preliminary node or not * @sync_point: sequence number where synch/failover is finished * @list: links to adjacent nodes in sorted list of cluster's nodes * @working_links: number of working links to node (both active and standby) @@ -112,6 +113,7 @@ struct tipc_node { int action_flags; struct list_head list; int state; + bool preliminary; bool failover_sent; u16 sync_point; int link_cnt; @@ -120,6 +122,7 @@ struct tipc_node { u32 signature; u32 link_id; u8 peer_id[16]; + char peer_id_string[NODE_ID_STR_LEN]; struct list_head publ_list; struct list_head conn_sks; unsigned long keepalive_intv; @@ -245,6 +248,16 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr) return caps; } +u32 tipc_node_get_addr(struct tipc_node *node) +{ + return (node) ? node->addr : 0; +} + +char *tipc_node_get_id_str(struct tipc_node *node) +{ + return node->peer_id_string; +} + static void tipc_node_kref_release(struct kref *kref) { struct tipc_node *n = container_of(kref, struct tipc_node, kref); @@ -274,7 +287,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr) rcu_read_lock(); hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { - if (node->addr != addr) + if (node->addr != addr || node->preliminary) continue; if (!kref_get_unless_zero(&node->kref)) node = NULL; @@ -400,17 +413,39 @@ static void tipc_node_assign_peer_net(struct tipc_node *n, u32 hash_mixes) static struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, u16 capabilities, - u32 signature, u32 hash_mixes) + u32 hash_mixes, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_node *n, *temp_node; struct tipc_link *l; + unsigned long intv; int bearer_id; int i; spin_lock_bh(&tn->node_list_lock); - n = tipc_node_find(net, addr); + n = tipc_node_find(net, addr) ?: + tipc_node_find_by_id(net, peer_id); if (n) { + if (!n->preliminary) + goto update; + if (preliminary) + goto exit; + /* A preliminary node becomes "real" now, refresh its data */ + tipc_node_write_lock(n); + n->preliminary = false; + n->addr = addr; + hlist_del_rcu(&n->hash); + hlist_add_head_rcu(&n->hash, + &tn->node_htable[tipc_hashfn(addr)]); + list_del_rcu(&n->list); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n->list, &temp_node->list); + tipc_node_write_unlock_fast(n); + +update: if (n->peer_hash_mix ^ hash_mixes) tipc_node_assign_peer_net(n, hash_mixes); if (n->capabilities == capabilities) @@ -438,7 +473,9 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, pr_warn("Node creation failed, no memory\n"); goto exit; } + tipc_nodeid2string(n->peer_id_string, peer_id); n->addr = addr; + n->preliminary = preliminary; memcpy(&n->peer_id, peer_id, 16); n->net = net; n->peer_net = NULL; @@ -463,22 +500,14 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, U16_MAX, - tipc_link_window(tipc_bc_sndlink(net)), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, - tipc_bc_sndlink(net), - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no memory\n"); - kfree(n); - n = NULL; - goto exit; - } + n->bc_entry.link = NULL; tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); - n->keepalive_intv = U32_MAX; + /* Start a slow timer anyway, crypto needs it */ + n->keepalive_intv = 10000; + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) + tipc_node_get(n); hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { if (n->addr < temp_node->addr) @@ -1001,6 +1030,8 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) { struct tipc_net *tn = tipc_net(net); struct tipc_node *n; + bool preliminary; + u32 sugg_addr; /* Suggest new address if some other peer is using this one */ n = tipc_node_find(net, addr); @@ -1016,9 +1047,11 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) /* Suggest previously used address if peer is known */ n = tipc_node_find_by_id(net, id); if (n) { - addr = n->addr; + sugg_addr = n->addr; + preliminary = n->preliminary; tipc_node_put(n); - return addr; + if (!preliminary) + return sugg_addr; } /* Even this node may be in conflict */ @@ -1035,7 +1068,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l; + struct tipc_link *l, *snd_l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1049,12 +1082,27 @@ void tipc_node_check_dest(struct net *net, u32 addr, *dupl_addr = false; *respond = false; - n = tipc_node_create(net, addr, peer_id, capabilities, signature, - hash_mixes); + n = tipc_node_create(net, addr, peer_id, capabilities, hash_mixes, + false); if (!n) return; tipc_node_write_lock(n); + if (unlikely(!n->bc_entry.link)) { + snd_l = tipc_bc_sndlink(net); + if (!tipc_link_bc_create(net, tipc_own_addr(net), + addr, U16_MAX, + tipc_link_window(snd_l), + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, + &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no mem\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + return; + } + } le = &n->links[b->identity]; @@ -2134,6 +2182,8 @@ int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) } list_for_each_entry_rcu(node, &tn->node_list, list) { + if (node->preliminary) + continue; if (last_addr) { if (node->addr == last_addr) last_addr = 0; @@ -2649,11 +2699,6 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, return skb->len; } -u32 tipc_node_get_addr(struct tipc_node *node) -{ - return (node) ? node->addr : 0; -} - /** * tipc_node_dump - dump TIPC node data * @n: tipc node to be dumped diff --git a/net/tipc/node.h b/net/tipc/node.h index c39cd861c07d..50f8838b32c2 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -75,6 +75,7 @@ enum { void tipc_node_stop(struct net *net); bool tipc_node_get_id(struct net *net, u32 addr, u8 *id); u32 tipc_node_get_addr(struct tipc_node *node); +char *tipc_node_get_id_str(struct tipc_node *node); u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr); void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, struct tipc_bearer *bearer, -- 2.13.7 |
From: Tuong L. <tuo...@de...> - 2019-11-08 05:05:32
|
This series provides TIPC encryption feature, kernel part. There will be another one in the 'iproute2/tipc' for user space to set key. v2: add select crypto 'aes(gcm)' for TIPC_CRYPTO in Kconfig Tuong Lien (5): tipc: add reference counter to bearer tipc: enable creating a "preliminary" node tipc: add new AEAD key structure for user API tipc: introduce TIPC encryption & authentication tipc: add support for AEAD key setting via netlink include/uapi/linux/tipc.h | 21 + include/uapi/linux/tipc_netlink.h | 4 + net/tipc/Kconfig | 15 + net/tipc/Makefile | 1 + net/tipc/bcast.c | 2 +- net/tipc/bearer.c | 49 +- net/tipc/bearer.h | 6 +- net/tipc/core.c | 14 + net/tipc/core.h | 8 + net/tipc/crypto.c | 1986 +++++++++++++++++++++++++++++++++++++ net/tipc/crypto.h | 167 ++++ net/tipc/link.c | 19 +- net/tipc/link.h | 1 + net/tipc/msg.c | 15 +- net/tipc/msg.h | 46 +- net/tipc/netlink.c | 18 +- net/tipc/node.c | 325 +++++- net/tipc/node.h | 13 + net/tipc/sysctl.c | 11 + net/tipc/udp_media.c | 1 + 20 files changed, 2651 insertions(+), 71 deletions(-) create mode 100644 net/tipc/crypto.c create mode 100644 net/tipc/crypto.h -- 2.13.7 |
From: Tuong L. <tuo...@de...> - 2019-11-08 05:05:32
|
As a need to support the crypto asynchronous operations in the later commits, apart from the current RCU mechanism for bearer pointer, we add a 'refcnt' to the bearer object as well. So, a bearer can be hold via 'tipc_bearer_hold()' without being freed even though the bearer or interface can be disabled in the meanwhile. If that happens, the bearer will be released then when the crypto operation is completed and 'tipc_bearer_put()' is called. Acked-by: Ying Xue <yin...@wi...> Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Tuong Lien <tuo...@de...> --- net/tipc/bearer.c | 14 +++++++++++++- net/tipc/bearer.h | 3 +++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 0214aa1c4427..6e15b9b1f1ef 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -315,6 +315,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, b->net_plane = bearer_id + 'A'; b->priority = prio; test_and_set_bit_lock(0, &b->up); + refcount_set(&b->refcnt, 1); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { @@ -351,6 +352,17 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) return 0; } +bool tipc_bearer_hold(struct tipc_bearer *b) +{ + return (b && refcount_inc_not_zero(&b->refcnt)); +} + +void tipc_bearer_put(struct tipc_bearer *b) +{ + if (b && refcount_dec_and_test(&b->refcnt)) + kfree_rcu(b, rcu); +} + /** * bearer_disable * @@ -369,7 +381,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) if (b->disc) tipc_disc_delete(b->disc); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); - kfree_rcu(b, rcu); + tipc_bearer_put(b); tipc_mon_delete(net, bearer_id); } diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index ea0f3c49cbed..faca696d422f 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -165,6 +165,7 @@ struct tipc_bearer { struct tipc_discoverer *disc; char net_plane; unsigned long up; + refcount_t refcnt; }; struct tipc_bearer_names { @@ -210,6 +211,8 @@ int tipc_media_set_window(const char *name, u32 new_value); int tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, struct nlattr *attrs[]); +bool tipc_bearer_hold(struct tipc_bearer *b); +void tipc_bearer_put(struct tipc_bearer *b); void tipc_disable_l2_media(struct tipc_bearer *b); int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, struct tipc_bearer *b, struct tipc_media_addr *dest); -- 2.13.7 |
From: Tuong L. T. <tuo...@de...> - 2019-11-08 04:37:03
|
You are right, David. I am going to resend the v2 series with an update for it. Thanks/Tuong -----Original Message----- From: David Miller <da...@da...> Sent: Friday, November 8, 2019 11:07 AM To: tuo...@de... Cc: jon...@er...; ma...@do...; yin...@wi...; ne...@vg...; tip...@li... Subject: Re: [net-next 0/5] TIPC encryption From: Tuong Lien <tuo...@de...> Date: Fri, 8 Nov 2019 08:42:08 +0700 > This series provides TIPC encryption feature, kernel part. There will be > another one in the 'iproute2/tipc' for user space to set key. If gcm(aes) is the only algorithm you accept, you will need to express this dependency in the Kconfig file. Otherwise it is pointless to turn on the TIPC crypto Kconfig option. |
From: David M. <da...@da...> - 2019-11-08 04:09:13
|
From: Hoang Le <hoa...@de...> Date: Fri, 8 Nov 2019 10:02:37 +0700 > Currently, we scan over all network namespaces at each received > discovery message in order to check if the sending peer might be > present in a host local namespaces. > > This is unnecessary since we can assume that a peer will not change its > location during an established session. > > We now improve the condition for this testing so that we don't perform > any redundant scans. > > Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") > Acked-by: Jon Maloy <jon...@er...> > Signed-off-by: Hoang Le <hoa...@de...> Applied, thank you. |
From: David M. <da...@da...> - 2019-11-08 04:07:12
|
From: Tuong Lien <tuo...@de...> Date: Fri, 8 Nov 2019 08:42:08 +0700 > This series provides TIPC encryption feature, kernel part. There will be > another one in the 'iproute2/tipc' for user space to set key. If gcm(aes) is the only algorithm you accept, you will need to express this dependency in the Kconfig file. Otherwise it is pointless to turn on the TIPC crypto Kconfig option. |
From: Hoang Le <hoa...@de...> - 2019-11-08 03:02:59
|
Currently, we scan over all network namespaces at each received discovery message in order to check if the sending peer might be present in a host local namespaces. This is unnecessary since we can assume that a peer will not change its location during an established session. We now improve the condition for this testing so that we don't perform any redundant scans. Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Acked-by: Jon Maloy <jon...@er...> Signed-off-by: Hoang Le <hoa...@de...> --- net/tipc/node.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 1f1584518221..b66d2f67b1dd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,10 +472,6 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr, tipc_bc_sndlink(net), &n->bc_entry.link)) { pr_warn("Broadcast rcv link creation failed, no memory\n"); - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } kfree(n); n = NULL; goto exit; @@ -1073,6 +1069,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, if (sign_match && addr_match && link_up) { /* All is fine. Do nothing. */ reset = false; + /* Peer node is not a container/local namespace */ + if (!n->peer_hash_mix) + n->peer_hash_mix = hash_mixes; } else if (sign_match && addr_match && !link_up) { /* Respond. The link will come up in due time */ *respond = true; @@ -1398,11 +1397,8 @@ static void node_lost_contact(struct tipc_node *n, /* Notify publications from this node */ n->action_flags |= TIPC_NOTIFY_NODE_DOWN; - - if (n->peer_net) { - n->peer_net = NULL; - n->peer_hash_mix = 0; - } + n->peer_net = NULL; + n->peer_hash_mix = 0; /* Notify sockets connected to node */ list_for_each_entry_safe(conn, safe, conns, list) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, -- 2.20.1 |