You can subscribe to this list here.
2003 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(6) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 |
Jan
(9) |
Feb
(11) |
Mar
(22) |
Apr
(73) |
May
(78) |
Jun
(146) |
Jul
(80) |
Aug
(27) |
Sep
(5) |
Oct
(14) |
Nov
(18) |
Dec
(27) |
2005 |
Jan
(20) |
Feb
(30) |
Mar
(19) |
Apr
(28) |
May
(50) |
Jun
(31) |
Jul
(32) |
Aug
(14) |
Sep
(36) |
Oct
(43) |
Nov
(74) |
Dec
(63) |
2006 |
Jan
(34) |
Feb
(32) |
Mar
(21) |
Apr
(76) |
May
(106) |
Jun
(72) |
Jul
(70) |
Aug
(175) |
Sep
(130) |
Oct
(39) |
Nov
(81) |
Dec
(43) |
2007 |
Jan
(81) |
Feb
(36) |
Mar
(20) |
Apr
(43) |
May
(54) |
Jun
(34) |
Jul
(44) |
Aug
(55) |
Sep
(44) |
Oct
(54) |
Nov
(43) |
Dec
(41) |
2008 |
Jan
(42) |
Feb
(84) |
Mar
(73) |
Apr
(30) |
May
(119) |
Jun
(54) |
Jul
(54) |
Aug
(93) |
Sep
(173) |
Oct
(130) |
Nov
(145) |
Dec
(153) |
2009 |
Jan
(59) |
Feb
(12) |
Mar
(28) |
Apr
(18) |
May
(56) |
Jun
(9) |
Jul
(28) |
Aug
(62) |
Sep
(16) |
Oct
(19) |
Nov
(15) |
Dec
(17) |
2010 |
Jan
(14) |
Feb
(36) |
Mar
(37) |
Apr
(30) |
May
(33) |
Jun
(53) |
Jul
(42) |
Aug
(50) |
Sep
(67) |
Oct
(66) |
Nov
(69) |
Dec
(36) |
2011 |
Jan
(52) |
Feb
(45) |
Mar
(49) |
Apr
(21) |
May
(34) |
Jun
(13) |
Jul
(19) |
Aug
(37) |
Sep
(43) |
Oct
(10) |
Nov
(23) |
Dec
(30) |
2012 |
Jan
(42) |
Feb
(36) |
Mar
(46) |
Apr
(25) |
May
(96) |
Jun
(146) |
Jul
(40) |
Aug
(28) |
Sep
(61) |
Oct
(45) |
Nov
(100) |
Dec
(53) |
2013 |
Jan
(79) |
Feb
(24) |
Mar
(134) |
Apr
(156) |
May
(118) |
Jun
(75) |
Jul
(278) |
Aug
(145) |
Sep
(136) |
Oct
(168) |
Nov
(137) |
Dec
(439) |
2014 |
Jan
(284) |
Feb
(158) |
Mar
(231) |
Apr
(275) |
May
(259) |
Jun
(91) |
Jul
(222) |
Aug
(215) |
Sep
(165) |
Oct
(166) |
Nov
(211) |
Dec
(150) |
2015 |
Jan
(164) |
Feb
(324) |
Mar
(299) |
Apr
(214) |
May
(111) |
Jun
(109) |
Jul
(105) |
Aug
(36) |
Sep
(58) |
Oct
(131) |
Nov
(68) |
Dec
(30) |
2016 |
Jan
(46) |
Feb
(87) |
Mar
(135) |
Apr
(174) |
May
(132) |
Jun
(135) |
Jul
(149) |
Aug
(125) |
Sep
(79) |
Oct
(49) |
Nov
(95) |
Dec
(102) |
2017 |
Jan
(104) |
Feb
(75) |
Mar
(72) |
Apr
(53) |
May
(18) |
Jun
(5) |
Jul
(14) |
Aug
(19) |
Sep
(2) |
Oct
(13) |
Nov
(21) |
Dec
(67) |
2018 |
Jan
(56) |
Feb
(50) |
Mar
(148) |
Apr
(41) |
May
(37) |
Jun
(34) |
Jul
(34) |
Aug
(11) |
Sep
(52) |
Oct
(48) |
Nov
(28) |
Dec
(46) |
2019 |
Jan
(29) |
Feb
(63) |
Mar
(95) |
Apr
(54) |
May
(14) |
Jun
(71) |
Jul
(60) |
Aug
(49) |
Sep
(3) |
Oct
(64) |
Nov
(115) |
Dec
(57) |
2020 |
Jan
(15) |
Feb
(9) |
Mar
(38) |
Apr
(27) |
May
(60) |
Jun
(53) |
Jul
(35) |
Aug
(46) |
Sep
(37) |
Oct
(64) |
Nov
(20) |
Dec
(25) |
2021 |
Jan
(20) |
Feb
(31) |
Mar
(27) |
Apr
(23) |
May
(21) |
Jun
(30) |
Jul
(30) |
Aug
(7) |
Sep
(18) |
Oct
|
Nov
(15) |
Dec
(4) |
2022 |
Jan
(3) |
Feb
(1) |
Mar
(10) |
Apr
|
May
(2) |
Jun
(26) |
Jul
(5) |
Aug
|
Sep
(1) |
Oct
(2) |
Nov
(9) |
Dec
(2) |
2023 |
Jan
(4) |
Feb
(4) |
Mar
(5) |
Apr
(10) |
May
(29) |
Jun
(17) |
Jul
|
Aug
|
Sep
(1) |
Oct
(1) |
Nov
(2) |
Dec
|
2024 |
Jan
|
Feb
(6) |
Mar
|
Apr
(1) |
May
(6) |
Jun
|
Jul
(5) |
Aug
|
Sep
(3) |
Oct
|
Nov
|
Dec
|
2025 |
Jan
|
Feb
(3) |
Mar
|
Apr
|
May
|
Jun
|
Jul
(6) |
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:52
|
TIPC GSO is implemented in the skb frag_list way as SCTP does. We don't need to change much in the tx path, but only create a head skb and append the skbs when there are more than one skb ready to send. In the lower-layer gso_segment(), it does fragmentation by copy eth header or ip/udp header to each skb in the head_skb's frag_list and send them one by one. This supports with both eth media and udp media. Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/bearer.c | 23 +++++++++++++++++++++-- net/tipc/msg.h | 1 + net/tipc/offload.c | 41 +++++++++++++++++++++++++++++++++++++++++ net/tipc/udp_media.c | 7 +++++++ 4 files changed, 70 insertions(+), 2 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 443f8e5b9477..b0321b21bfdc 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -570,8 +570,9 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct tipc_media_addr *dst, struct tipc_node *__dnode) { + struct sk_buff *head = NULL, *skb, *tmp; struct tipc_bearer *b; - struct sk_buff *skb, *tmp; + u16 segs = 0; if (skb_queue_empty(xmitq)) return; @@ -585,13 +586,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) { #ifdef CONFIG_TIPC_CRYPTO tipc_crypto_xmit(net, &skb, b, dst, __dnode); - if (skb) + if (!skb) + continue; #endif + if (!skb->ignore_df) { /* PLPMTUD probe packet*/ b->media->send_msg(net, skb, b, dst); + continue; + } + if (!head) { + segs = 1; + head = skb; + continue; + } + if (tipc_msg_gso_append(&head, skb, segs)) { + segs++; + continue; + } + b->media->send_msg(net, head, b, dst); + segs = 1; + head = skb; } else { kfree_skb(skb); } } + if (head) + b->media->send_msg(net, head, b, dst); rcu_read_unlock(); } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d6c6231b8208..4d1ff666790c 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1205,6 +1205,7 @@ bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); void tipc_offload_init(void); void tipc_offload_exit(void); +bool tipc_msg_gso_append(struct sk_buff **p, struct sk_buff *skb, u16 segs); static inline u16 buf_seqno(struct sk_buff *skb) { diff --git a/net/tipc/offload.c b/net/tipc/offload.c index f8a81c8886f0..d137679f4db0 100644 --- a/net/tipc/offload.c +++ b/net/tipc/offload.c @@ -18,6 +18,47 @@ static struct packet_offload tipc_packet_offload __read_mostly = { }, }; +bool tipc_msg_gso_append(struct sk_buff **p, struct sk_buff *skb, u16 segs) +{ + struct sk_buff *head = *p; + struct sk_buff *nskb; + + if (head->len + skb->len >= 65535) + return false; + + if (segs == 1) { + nskb = tipc_buf_acquire(0, GFP_ATOMIC); + if (!nskb) + return false; + + nskb->ip_summed = CHECKSUM_UNNECESSARY; + nskb->truesize += head->truesize; + nskb->data_len += head->len; + nskb->len += head->len; + TIPC_SKB_CB(nskb)->tail = head; + + skb_shinfo(nskb)->frag_list = head; + skb_shinfo(nskb)->gso_segs = 1; + skb_shinfo(nskb)->gso_type = SKB_GSO_TIPC; + skb_shinfo(nskb)->gso_size = GSO_BY_FRAGS; + skb_reset_network_header(head); + + head = nskb; + *p = head; + } + + head->truesize += skb->truesize; + head->data_len += skb->len; + head->len += skb->len; + TIPC_SKB_CB(head)->tail->next = skb; + TIPC_SKB_CB(head)->tail = skb; + + skb_shinfo(head)->gso_segs++; + skb_reset_network_header(skb); + + return true; +} + void tipc_offload_init(void) { dev_add_offload(&tipc_packet_offload); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 5078c5b19e81..7da02db6a50e 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -245,6 +245,13 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto out; } + if (skb_is_gso(skb)) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + + skb->encapsulation = 1; + skb_reset_inner_mac_header(skb); + skb_reset_inner_network_header(skb); + skb_reset_inner_transport_header(skb); skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); ub = rcu_dereference(b->media_ptr); if (!ub) { -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:46
|
Since there's no enough bit in netdev_features_t for NETIF_F_GSO_TIPC_BIT, and tipc is using the simliar code as sctp, this patch will reuse SKB_GSO_SCTP and NETIF_F_GSO_SCTP_BIT for tipc. Signed-off-by: Xin Long <luc...@gm...> --- include/linux/skbuff.h | 2 -- net/tipc/node.c | 15 ++++++++++++++- net/tipc/offload.c | 4 ++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 148bf0ed7336..b2db9cd9a73f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -599,8 +599,6 @@ enum { SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, - - SKB_GSO_TIPC = 1 << 19, }; #if BITS_PER_LONG > 32 diff --git a/net/tipc/node.c b/net/tipc/node.c index 9947b7dfe1d2..17e59c8dac31 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2068,7 +2068,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) +static void __tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) { struct sk_buff_head xmitq; struct tipc_link_entry *le; @@ -2189,6 +2189,19 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) kfree_skb(skb); } +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) +{ + struct sk_buff *seg, *next; + + if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + return __tipc_rcv(net, skb, b); + + skb_list_walk_safe(skb_shinfo(skb)->frag_list, seg, next) + __tipc_rcv(net, seg, b); + skb_shinfo(skb)->frag_list = NULL; + consume_skb(skb); +} + void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop) { diff --git a/net/tipc/offload.c b/net/tipc/offload.c index d137679f4db0..26e372178635 100644 --- a/net/tipc/offload.c +++ b/net/tipc/offload.c @@ -5,7 +5,7 @@ static struct sk_buff *tipc_gso_segment(struct sk_buff *skb, netdev_features_t features) { - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TIPC)) + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) return ERR_PTR(-EINVAL); return skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG); @@ -39,7 +39,7 @@ bool tipc_msg_gso_append(struct sk_buff **p, struct sk_buff *skb, u16 segs) skb_shinfo(nskb)->frag_list = head; skb_shinfo(nskb)->gso_segs = 1; - skb_shinfo(nskb)->gso_type = SKB_GSO_TIPC; + skb_shinfo(nskb)->gso_type = SKB_GSO_SCTP; skb_shinfo(nskb)->gso_size = GSO_BY_FRAGS; skb_reset_network_header(head); -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:43
|
This patch is to receive and process the probe ack by checking msg_max_pkt() == l->pl.probe_size then does state transition in tipc_link_pl_recv(). For the details, see: https://lwn.net/Articles/860385/ Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/link.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 3af6c04f82c2..241c9378e258 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -293,6 +293,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted); static void tipc_link_pl_send(struct tipc_link *l); +static void tipc_link_pl_recv(struct tipc_link *l); /* * Simple non-static link routines (i.e. referenced outside this file) */ @@ -2333,6 +2334,13 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; } + if (!reply && msg_max_pkt(hdr) == l->pl.probe_size) { + tipc_link_pl_recv(l); + if (l->pl.state == TIPC_PL_COMPLETE) + break; + tipc_link_build_proto_msg(l, STATE_MSG, PROBE_PLPMTU, 0, 0, 0, 0, xmitq); + } + /* Receive Gap ACK blocks from peer if any */ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); @@ -3061,3 +3069,43 @@ static void tipc_link_pl_send(struct tipc_link *l) } l->pl.count = TIPC_PROBE_INTERVAL; } + +static void tipc_link_pl_recv(struct tipc_link *l) +{ + pr_debug("%s: PLPMTUD: link: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, l, l->pl.state, l->pl.pmtu, l->pl.probe_size, l->pl.probe_high); + + l->pl.pmtu = l->pl.probe_size; + l->pl.count = 0; + if (l->pl.state == TIPC_PL_BASE) { + l->pl.state = TIPC_PL_SEARCH; /* Base -> Search */ + l->pl.probe_size += TIPC_PL_BIG_STEP; + } else if (l->pl.state == TIPC_PL_ERROR) { + l->pl.state = TIPC_PL_SEARCH; /* Error -> Search */ + + l->pl.pmtu = l->pl.probe_size; + l->mtu = l->pl.pmtu; + l->pl.probe_size += TIPC_PL_BIG_STEP; + } else if (l->pl.state == TIPC_PL_SEARCH) { + if (!l->pl.probe_high) { + l->pl.probe_size = min(l->pl.probe_size + TIPC_PL_BIG_STEP, + TIPC_MAX_PLPMTU); + return; + } + l->pl.probe_size += TIPC_PL_MIN_STEP; + if (l->pl.probe_size >= l->pl.probe_high) { + l->pl.probe_high = 0; + l->pl.raise = 0; + l->pl.state = TIPC_PL_COMPLETE; /* Search -> Search Complete */ + + l->pl.probe_size = l->pl.pmtu; + l->mtu = l->pl.pmtu; + } + } else if (l->pl.state == TIPC_PL_COMPLETE) { + l->pl.raise++; + if (l->pl.raise == 30) { + l->pl.state = TIPC_PL_SEARCH; /* Search Complete -> Search */ + l->pl.probe_size += TIPC_PL_MIN_STEP; + } + } +} -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:43
|
This is the base code for tipc gso, and tipc_gso_segment() will only be called after gso packets are built in the next patch. Signed-off-by: Xin Long <luc...@gm...> --- include/linux/skbuff.h | 2 ++ net/tipc/Makefile | 2 +- net/tipc/core.c | 3 +++ net/tipc/msg.h | 2 ++ net/tipc/offload.c | 29 +++++++++++++++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 net/tipc/offload.c diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b2db9cd9a73f..148bf0ed7336 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -599,6 +599,8 @@ enum { SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, + + SKB_GSO_TIPC = 1 << 19, }; #if BITS_PER_LONG > 32 diff --git a/net/tipc/Makefile b/net/tipc/Makefile index ee49a9f1dd4f..ff276bf78d03 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - topsrv.o group.o trace.o + topsrv.o group.o trace.o offload.o CFLAGS_trace.o += -I$(src) diff --git a/net/tipc/core.c b/net/tipc/core.c index 3f4542e0f065..1f59371aa036 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -186,6 +186,8 @@ static int __init tipc_init(void) if (err) goto out_netlink_compat; + tipc_offload_init(); + pr_info("Started in single node mode\n"); return 0; @@ -210,6 +212,7 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { + tipc_offload_exit(); tipc_netlink_compat_stop(); tipc_netlink_stop(); tipc_bearer_cleanup(); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 64ae4c4c44f8..d6c6231b8208 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1203,6 +1203,8 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); +void tipc_offload_init(void); +void tipc_offload_exit(void); static inline u16 buf_seqno(struct sk_buff *skb) { diff --git a/net/tipc/offload.c b/net/tipc/offload.c new file mode 100644 index 000000000000..f8a81c8886f0 --- /dev/null +++ b/net/tipc/offload.c @@ -0,0 +1,29 @@ +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include "msg.h" + +static struct sk_buff *tipc_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TIPC)) + return ERR_PTR(-EINVAL); + + return skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG); +} + +static struct packet_offload tipc_packet_offload __read_mostly = { + .type = cpu_to_be16(ETH_P_TIPC), + .callbacks = { + .gso_segment = tipc_gso_segment, + }, +}; + +void tipc_offload_init(void) +{ + dev_add_offload(&tipc_packet_offload); +} + +void tipc_offload_exit(void) +{ + dev_remove_offload(&tipc_packet_offload); +} -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:33
|
pl.count will make a timer that 'timeout' every after '10 * node timer interval', where it does state transition in tipc_link_pl_send() and sends probe in tipc_link_build_proto_msg(). For the details, see: https://lwn.net/Articles/860385/ Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/link.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 414f9cf543ff..3af6c04f82c2 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -292,6 +292,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, bool *retransmitted, int *rc); static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted); +static void tipc_link_pl_send(struct tipc_link *l); /* * Simple non-static link routines (i.e. referenced outside this file) */ @@ -902,6 +903,14 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) if (state || probe || setup) tipc_link_build_proto_msg(l, mtyp, PROBE_MSTATE, 0, 0, 0, 0, xmitq); + if (probe && tipc_link_is_up(l)) { + l->pl.count++; + if (!(l->pl.count % TIPC_PROBE_INTERVAL)) { + tipc_link_pl_send(l); + tipc_link_build_proto_msg(l, mtyp, PROBE_PLPMTU, 0, 0, 0, 0, xmitq); + } + } + return rc; } @@ -3013,3 +3022,42 @@ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) return i; } + +static void tipc_link_pl_send(struct tipc_link *l) +{ + pr_debug("%s: PLPMTUD: link: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, l, l->pl.state, l->pl.pmtu, l->pl.probe_size, l->pl.probe_high); + + if (l->pl.count <= TIPC_MAX_PROBES * TIPC_PROBE_INTERVAL) + return; + + if (l->pl.state == TIPC_PL_BASE) { + if (l->pl.probe_size == TIPC_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ + l->pl.state = TIPC_PL_ERROR; /* Base -> Error */ + + l->pl.pmtu = TIPC_MIN_PLPMTU; + l->mtu = l->pl.pmtu; + } + } else if (l->pl.state == TIPC_PL_SEARCH) { + if (l->pl.pmtu == l->pl.probe_size) { /* Black Hole Detected */ + l->pl.state = TIPC_PL_BASE; /* Search -> Base */ + l->pl.probe_size = TIPC_BASE_PLPMTU; + l->pl.probe_high = 0; + + l->pl.pmtu = TIPC_BASE_PLPMTU; + l->mtu = l->pl.pmtu; + } else { /* Normal probe failure. */ + l->pl.probe_high = l->pl.probe_size; + l->pl.probe_size = l->pl.pmtu; + } + } else if (l->pl.state == TIPC_PL_COMPLETE) { + if (l->pl.pmtu == l->pl.probe_size) { /* Black Hole Detected */ + l->pl.state = TIPC_PL_BASE; /* Search Complete -> Base */ + l->pl.probe_size = TIPC_BASE_PLPMTU; + + l->pl.pmtu = TIPC_BASE_PLPMTU; + l->mtu = l->pl.pmtu; + } + } + l->pl.count = TIPC_PROBE_INTERVAL; +} -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:33
|
This patch is to adjust the code in tipc_link_build_proto_msg() to make it able to build probe packet with a specific size for sender, and probe reply packet with mtu set. Note that to send the probe packet, the df flag has to be set. Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/link.c | 38 +++++++++++++++++++++++--------------- net/tipc/link.h | 9 +++++++++ net/tipc/msg.c | 1 + net/tipc/udp_media.c | 3 ++- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 1aa775cef3bb..414f9cf543ff 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -273,8 +273,8 @@ static int link_is_up(struct tipc_link *l) static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - bool probe_reply, u16 rcvgap, +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, + u8 ptype, u32 mtu, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); @@ -900,7 +900,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) } if (state || probe || setup) - tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, PROBE_MSTATE, 0, 0, 0, 0, xmitq); return rc; } @@ -1862,8 +1862,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, return rc; } -static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - bool probe_reply, u16 rcvgap, +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, + u8 ptype, u32 mtu, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { @@ -1874,7 +1874,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct sk_buff *skb; bool node_up = link_is_up(bcl); u16 glen = 0, bc_rcvgap = 0; - int dlen = 0; + int dlen = 0, msg_sz; void *data; /* Don't send protocol message during reset or link failover */ @@ -1884,11 +1884,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) return; - if ((probe || probe_reply) && !skb_queue_empty(dfq)) + if (ptype && !skb_queue_empty(dfq)) rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; - skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, - tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, + msg_sz = tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ; + if (ptype == PROBE_PLPMTU) + msg_sz = l->pl.probe_size - INT_H_SIZE; + skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, msg_sz, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; @@ -1915,13 +1917,19 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_seq_gap(hdr, rcvgap); bc_rcvgap = link_bc_rcv_gap(bcl); msg_set_bc_gap(hdr, bc_rcvgap); - msg_set_probe(hdr, probe); - msg_set_is_keepalive(hdr, probe || probe_reply); + msg_set_probe(hdr, ptype == PROBE_MSTATE || ptype == PROBE_PLPMTU); + msg_set_is_keepalive(hdr, !!ptype); + if (ptype == PROBE_REPLY) + msg_set_max_pkt(hdr, mtu); if (l->peer_caps & TIPC_GAP_ACK_BLOCK) glen = tipc_build_gap_ack_blks(l, hdr); tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); - msg_set_size(hdr, INT_H_SIZE + glen + dlen); - skb_trim(skb, INT_H_SIZE + glen + dlen); + if (ptype != PROBE_PLPMTU) { + msg_set_size(hdr, INT_H_SIZE + glen + dlen); + skb_trim(skb, INT_H_SIZE + glen + dlen); + } else { + skb->ignore_df = 0; + } l->stats.sent_states++; l->rcv_unacked = 0; } else { @@ -1935,7 +1943,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); } - if (probe) + if (ptype == PROBE_MSTATE || ptype == PROBE_PLPMTU) l->stats.sent_probes++; if (rcvgap) l->stats.sent_nacks++; @@ -2329,7 +2337,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, skb_queue_empty(&l->deferdq)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || reply) - tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + tipc_link_build_proto_msg(l, STATE_MSG, PROBE_REPLY, msg_size(hdr), rcvgap, 0, 0, xmitq); released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq, diff --git a/net/tipc/link.h b/net/tipc/link.h index 30bee2562987..87b3ebe5b91d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -66,6 +66,15 @@ enum { TIPC_LINK_SND_STATE = (1 << 2) }; +/* Probe Type + */ +enum { + PROBE_NONE, + PROBE_MSTATE, + PROBE_REPLY, + PROBE_PLPMTU, +}; + /* PLPMTUD state */ enum { diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c4b..6d8bcc180f8b 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -75,6 +75,7 @@ struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) skb_put(skb, size); skb->next = NULL; } + skb->ignore_df = 1; return skb; } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index dc4bae965549..5078c5b19e81 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -174,6 +174,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, local_bh_disable(); ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { + u8 df = skb->ignore_df ? 0 : htons(IP_DF); struct rtable *rt = (struct rtable *)ndst; if (!rt) { @@ -193,7 +194,7 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, - dst->ipv4.s_addr, 0, ttl, 0, src->port, + dst->ipv4.s_addr, 0, ttl, df, src->port, dst->port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:32
|
These are 4 constants described in rfc8899#section-5.1.2: MAX_PROBES, MIN_PLPMTU, MAX_PLPMTU, BASE_PLPMTU; And 2 variables described in rfc8899#section-5.1.3: PROBED_SIZE, PROBE_COUNT; And 5 states described in rfc8899#section-5.2: DISABLED, BASE, SEARCH, SEARCH_COMPLETE, ERROR; 'count' and 'raise' are used for two timers' counting: PROBE_TIMER and PMTU_RAISE_TIMER. 'probe_high' is used for finding the optimal value for pmtu. Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/link.c | 13 +++++++++++++ net/tipc/link.h | 20 ++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index cf586840caeb..1aa775cef3bb 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -182,6 +182,14 @@ struct tipc_link { /* Max packet negotiation */ u16 mtu; u16 advertised_mtu; + struct { + u16 probe_size; + u16 probe_high; + u16 pmtu; + u8 count; + u8 state:3; + u8 raise:5; + } pl; /* Sending */ struct sk_buff_head transmq; @@ -984,6 +992,11 @@ void tipc_link_reset(struct tipc_link *l) l->peer_session--; l->session++; l->mtu = l->advertised_mtu; + l->pl.state = TIPC_PL_BASE; + l->pl.pmtu = TIPC_BASE_PLPMTU; + l->pl.probe_size = TIPC_BASE_PLPMTU; + l->pl.count = 0; + l->pl.probe_high = 0; spin_lock_bh(&l->wakeupq.lock); skb_queue_splice_init(&l->wakeupq, &list); diff --git a/net/tipc/link.h b/net/tipc/link.h index a16f401fdabd..30bee2562987 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -66,6 +66,26 @@ enum { TIPC_LINK_SND_STATE = (1 << 2) }; +/* PLPMTUD state + */ +enum { + TIPC_PL_DISABLED, + TIPC_PL_BASE, + TIPC_PL_SEARCH, + TIPC_PL_COMPLETE, + TIPC_PL_ERROR, +}; + +#define TIPC_BASE_PLPMTU 1200 +#define TIPC_MAX_PLPMTU 9000 +#define TIPC_MIN_PLPMTU 512 + +#define TIPC_MAX_PROBES 3 +#define TIPC_PROBE_INTERVAL 10 + +#define TIPC_PL_BIG_STEP 32 +#define TIPC_PL_MIN_STEP 4 + /* Starting value for maximum packet size negotiation on unicast links * (unless bearer MTU is less) */ -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:29
|
This patchset is to implement PLPMTUD and GSO for TIPC, Patch 1-5 are for PLPMTUD while 6-8 are for GSO. It gets some ideas from SCTP as their similarities like both are reliable datagram packets and possible to run over IP(v6)/UDP. But also it does some adjustments for TIPC. Xin Long (8): tipc: set the mtu for bearer properly for udp media tipc: add the constants and variables for plpmtud tipc: build probe and its reply in tipc_link_build_proto_msg tipc: add probe send and state transition tipc: add probe recv and state transition tipc: add offload base tipc: add software gso tipc: add hardware gso include/uapi/linux/tipc_config.h | 6 -- net/tipc/Makefile | 2 +- net/tipc/bearer.c | 23 ++++- net/tipc/core.c | 3 + net/tipc/link.c | 147 +++++++++++++++++++++++++++---- net/tipc/link.h | 29 ++++++ net/tipc/msg.c | 1 + net/tipc/msg.h | 3 + net/tipc/node.c | 15 +++- net/tipc/offload.c | 70 +++++++++++++++ net/tipc/udp_media.c | 18 ++-- 11 files changed, 287 insertions(+), 30 deletions(-) create mode 100644 net/tipc/offload.c -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-06 18:22:26
|
Instead of using 14000 for ipv4/udp mtu, and 1280 for ipv6/udp mtu, this patch to set mtu according to the lower device's mtu at the beginning. The pmtu will be determined by the PLPMTUD probe in the following patches. Signed-off-by: Xin Long <luc...@gm...> --- include/uapi/linux/tipc_config.h | 6 ------ net/tipc/udp_media.c | 8 ++++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h index 4dfc05651c98..7e23b7f438b4 100644 --- a/include/uapi/linux/tipc_config.h +++ b/include/uapi/linux/tipc_config.h @@ -185,12 +185,6 @@ #define TIPC_DEF_LINK_WIN 50 #define TIPC_MAX_LINK_WIN 8191 -/* - * Default MTU for UDP media - */ - -#define TIPC_DEF_LINK_UDP_MTU 14000 - struct tipc_node_info { __be32 addr; /* network address of node */ __be32 up; /* 0=down, 1= up */ diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index c2bb818704c8..dc4bae965549 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -661,7 +661,7 @@ int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, struct nlattr *attrs[]) { - int err = -EINVAL; + int err = -EINVAL, hlen; struct udp_bearer *ub; struct udp_media_addr remote = {0}; struct udp_media_addr local = {0}; @@ -743,7 +743,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, err = -EINVAL; goto err; } - b->mtu = b->media->mtu; + hlen = sizeof(struct iphdr); #if IS_ENABLED(CONFIG_IPV6) } else if (local.proto == htons(ETH_P_IPV6)) { dev = ub->ifindex ? __dev_get_by_index(net, ub->ifindex) : NULL; @@ -760,12 +760,13 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, else udp_conf.local_ip6 = local.ipv6; ub->ifindex = dev->ifindex; - b->mtu = 1280; + hlen = sizeof(struct ipv6hdr); #endif } else { err = -EAFNOSUPPORT; goto err; } + b->mtu = b->media->mtu ?: dev->mtu - hlen - sizeof(struct udphdr); udp_conf.local_udp_port = local.port; err = udp_sock_create(net, &udp_conf, &ub->ubsock); if (err) @@ -851,7 +852,6 @@ struct tipc_media udp_media_info = { .tolerance = TIPC_DEF_LINK_TOL, .min_win = TIPC_DEF_LINK_WIN, .max_win = TIPC_DEF_LINK_WIN, - .mtu = TIPC_DEF_LINK_UDP_MTU, .type_id = TIPC_MEDIA_TYPE_UDP, .hwaddr_len = 0, .name = "udp" -- 2.27.0 |
From: Jon M. <jm...@re...> - 2021-07-06 13:45:36
|
On 7/5/21 11:57 PM, maloy wrote: > > > > > Sent from my Galaxy > > > -------- Original message -------- > From: Xin Long <luc...@gm...> > Date: 2021-06-30 20:21 (GMT-05:00) > To: Jon Maloy <jm...@re...>, Erin Shepherd > <eri...@e4...>, tip...@li... > Subject: [tipc-discussion] [PATCHv2 net-next] tipc: keep the skb in > rcv queue until the whole data is read > > Currently, when userspace reads a datagram with a buffer that is > smaller than this datagram, the data will be truncated and only > part of it can be received by users. It doesn't seem right that > users don't know the datagram size and have to use a huge buffer > to read it to avoid the truncation. > > This patch to fix it by keeping the skb in rcv queue until the > whole data is read by users. Only the last msg of the datagram > will be marked with MSG_EOR, just as TCP/SCTP does. > > Note that this will work as above only when MSG_EOR is set in the > flags parameter of recvmsg(), so that it won't break any old user > applications. > > v1->v2: > - To enable this only when the flags with MSG_EOR is passed into > recvmsg() to fix the compatibility isssue as Erin noticed. > > Signed-off-by: Xin Long <luc...@gm...> > --- > net/tipc/socket.c | 36 +++++++++++++++++++++++++++--------- > 1 file changed, 27 insertions(+), 9 deletions(-) > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c > index 34a97ea36cc8..9b0b311c7ec1 100644 > --- a/net/tipc/socket.c > +++ b/net/tipc/socket.c > @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, > struct msghdr *m, > bool connected = !tipc_sk_type_connectionless(sk); > struct tipc_sock *tsk = tipc_sk(sk); > int rc, err, hlen, dlen, copy; > + struct tipc_skb_cb *skb_cb; > struct sk_buff_head xmitq; > struct tipc_msg *hdr; > struct sk_buff *skb; > @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, > struct msghdr *m, > if (unlikely(rc)) > goto exit; > skb = skb_peek(&sk->sk_receive_queue); > + skb_cb = TIPC_SKB_CB(skb); > hdr = buf_msg(skb); > dlen = msg_data_sz(hdr); > hlen = msg_hdr_sz(hdr); > @@ -1922,18 +1924,33 @@ static int tipc_recvmsg(struct socket *sock, > struct msghdr *m, > > /* Capture data if non-error msg, otherwise just set return value */ > if (likely(!err)) { > - copy = min_t(int, dlen, buflen); > - if (unlikely(copy != dlen)) > - m->msg_flags |= MSG_TRUNC; > - rc = skb_copy_datagram_msg(skb, hlen, m, copy); > + int offset = skb_cb->bytes_read; > + > + copy = min_t(int, dlen - offset, buflen); > + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); > + if (unlikely(rc)) > + goto exit; > + if (unlikely(offset + copy < dlen)) { > + if (flags & MSG_EOR) { > + if (!(flags & MSG_PEEK)) > + skb_cb->bytes_read = offset + copy; > + } else { > + m->msg_flags |= MSG_TRUNC; > + skb_cb->bytes_read = 0; > + } > + } else { > + if (flags & MSG_EOR) > + m->msg_flags |= MSG_EOR; > + skb_cb->bytes_read = 0; > + } > } else { > copy = 0; > rc = 0; > - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) > + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { > rc = -ECONNRESET; > + goto exit; > + } > } > - if (unlikely(rc)) > - goto exit; > > /* Mark message as group event if applicable */ > if (unlikely(grp_evt)) { > @@ -1956,9 +1973,10 @@ static int tipc_recvmsg(struct socket *sock, > struct msghdr *m, > tipc_node_distr_xmit(sock_net(sk), &xmitq); > } > > - tsk_advance_rx_queue(sk); > + if (!skb_cb->bytes_read) > + tsk_advance_rx_queue(sk); > > - if (likely(!connected)) > + if (likely(!connected) || skb_cb->bytes_read) > goto exit; > > /* Send connection flow control advertisement when applicable */ > -- > 2.27.0 > > > Signed-off-by: Jon Maloy <jm...@re...> Acked-by, of course ;-) > _______________________________________________ > tipc-discussion mailing list > tip...@li... > https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: maloy <ma...@do...> - 2021-07-06 04:28:29
|
Sent from my Galaxy -------- Original message --------From: Xin Long <luc...@gm...> Date: 2021-06-30 20:21 (GMT-05:00) To: Jon Maloy <jm...@re...>, Erin Shepherd <eri...@e4...>, tip...@li... Subject: [tipc-discussion] [PATCHv2 net-next] tipc: keep the skb in rcv queue until the whole data is read Currently, when userspace reads a datagram with a buffer that issmaller than this datagram, the data will be truncated and onlypart of it can be received by users. It doesn't seem right thatusers don't know the datagram size and have to use a huge bufferto read it to avoid the truncation.This patch to fix it by keeping the skb in rcv queue until thewhole data is read by users. Only the last msg of the datagramwill be marked with MSG_EOR, just as TCP/SCTP does.Note that this will work as above only when MSG_EOR is set in theflags parameter of recvmsg(), so that it won't break any old userapplications.v1->v2: - To enable this only when the flags with MSG_EOR is passed into recvmsg() to fix the compatibility isssue as Erin noticed.Signed-off-by: Xin Long <luc...@gm...>--- net/tipc/socket.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-)diff --git a/net/tipc/socket.c b/net/tipc/socket.cindex 34a97ea36cc8..9b0b311c7ec1 100644--- a/net/tipc/socket.c+++ b/net/tipc/socket.c@@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy;+ struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb;@@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue);+ skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr);@@ -1922,18 +1924,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) {- copy = min_t(int, dlen, buflen);- if (unlikely(copy != dlen))- m->msg_flags |= MSG_TRUNC;- rc = skb_copy_datagram_msg(skb, hlen, m, copy);+ int offset = skb_cb->bytes_read;++ copy = min_t(int, dlen - offset, buflen);+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);+ if (unlikely(rc))+ goto exit;+ if (unlikely(offset + copy < dlen)) {+ if (flags & MSG_EOR) {+ if (!(flags & MSG_PEEK))+ skb_cb->bytes_read = offset + copy;+ } else {+ m->msg_flags |= MSG_TRUNC;+ skb_cb->bytes_read = 0;+ }+ } else {+ if (flags & MSG_EOR)+ m->msg_flags |= MSG_EOR;+ skb_cb->bytes_read = 0;+ } } else { copy = 0; rc = 0;- if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET;+ goto exit;+ } }- if (unlikely(rc))- goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) {@@ -1956,9 +1973,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, tipc_node_distr_xmit(sock_net(sk), &xmitq); } - tsk_advance_rx_queue(sk);+ if (!skb_cb->bytes_read)+ tsk_advance_rx_queue(sk); - if (likely(!connected))+ if (likely(!connected) || skb_cb->bytes_read) goto exit; /* Send connection flow control advertisement when applicable */-- 2.27.0Signed-off-by: Jon Maloy <jm...@re...>_______________________________________________tipc-discussion mailing lis...@li...https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: Xin L. <luc...@gm...> - 2021-07-01 00:21:30
|
Currently, when userspace reads a datagram with a buffer that is smaller than this datagram, the data will be truncated and only part of it can be received by users. It doesn't seem right that users don't know the datagram size and have to use a huge buffer to read it to avoid the truncation. This patch to fix it by keeping the skb in rcv queue until the whole data is read by users. Only the last msg of the datagram will be marked with MSG_EOR, just as TCP/SCTP does. Note that this will work as above only when MSG_EOR is set in the flags parameter of recvmsg(), so that it won't break any old user applications. v1->v2: - To enable this only when the flags with MSG_EOR is passed into recvmsg() to fix the compatibility isssue as Erin noticed. Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/socket.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 34a97ea36cc8..9b0b311c7ec1 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); @@ -1922,18 +1924,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { - copy = min_t(int, dlen, buflen); - if (unlikely(copy != dlen)) - m->msg_flags |= MSG_TRUNC; - rc = skb_copy_datagram_msg(skb, hlen, m, copy); + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } } else { copy = 0; rc = 0; - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; + goto exit; + } } - if (unlikely(rc)) - goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { @@ -1956,9 +1973,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, tipc_node_distr_xmit(sock_net(sk), &xmitq); } - tsk_advance_rx_queue(sk); + if (!skb_cb->bytes_read) + tsk_advance_rx_queue(sk); - if (likely(!connected)) + if (likely(!connected) || skb_cb->bytes_read) goto exit; /* Send connection flow control advertisement when applicable */ -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-07-01 00:18:40
|
kernel-doc for TIPC is too simple, we need to add more information for it. This patch is to extend the abstract, and add the Features and Links items. Signed-off-by: Xin Long <luc...@gm...> Acked-by: Jon Maloy <jm...@re...> --- Documentation/networking/tipc.rst | 121 +++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/tipc.rst b/Documentation/networking/tipc.rst index 76775f24cdc8..ab63d298cca2 100644 --- a/Documentation/networking/tipc.rst +++ b/Documentation/networking/tipc.rst @@ -4,10 +4,125 @@ Linux Kernel TIPC ================= -TIPC (Transparent Inter Process Communication) is a protocol that is -specially designed for intra-cluster communication. +Introduction +============ -For more information about TIPC, see http://tipc.sourceforge.net. +TIPC (Transparent Inter Process Communication) is a protocol that is specially +designed for intra-cluster communication. It can be configured to transmit +messages either on UDP or directly across Ethernet. Message delivery is +sequence guaranteed, loss free and flow controlled. Latency times are shorter +than with any other known protocol, while maximal throughput is comparable to +that of TCP. + +TIPC Features +------------- + +- Cluster wide IPC service + + Have you ever wished you had the convenience of Unix Domain Sockets even when + transmitting data between cluster nodes? Where you yourself determine the + addresses you want to bind to and use? Where you don't have to perform DNS + lookups and worry about IP addresses? Where you don't have to start timers + to monitor the continuous existence of peer sockets? And yet without the + downsides of that socket type, such as the risk of lingering inodes? + + Welcome to the Transparent Inter Process Communication service, TIPC in short, + which gives you all of this, and a lot more. + +- Service Addressing + + A fundamental concept in TIPC is that of Service Addressing which makes it + possible for a programmer to chose his own address, bind it to a server + socket and let client programs use only that address for sending messages. + +- Service Tracking + + A client wanting to wait for the availability of a server, uses the Service + Tracking mechanism to subscribe for binding and unbinding/close events for + sockets with the associated service address. + + The service tracking mechanism can also be used for Cluster Topology Tracking, + i.e., subscribing for availability/non-availability of cluster nodes. + + Likewise, the service tracking mechanism can be used for Cluster Connectivity + Tracking, i.e., subscribing for up/down events for individual links between + cluster nodes. + +- Transmission Modes + + Using a service address, a client can send datagram messages to a server socket. + + Using the same address type, it can establish a connection towards an accepting + server socket. + + It can also use a service address to create and join a Communication Group, + which is the TIPC manifestation of a brokerless message bus. + + Multicast with very good performance and scalability is available both in + datagram mode and in communication group mode. + +- Inter Node Links + + Communication between any two nodes in a cluster is maintained by one or two + Inter Node Links, which both guarantee data traffic integrity and monitor + the peer node's availability. + +- Cluster Scalability + + By applying the Overlapping Ring Monitoring algorithm on the inter node links + it is possible to scale TIPC clusters up to 1000 nodes with a maintained + neighbor failure discovery time of 1-2 seconds. For smaller clusters this + time can be made much shorter. + +- Neighbor Discovery + + Neighbor Node Discovery in the cluster is done by Ethernet broadcast or UDP + multicast, when any of those services are available. If not, configured peer + IP addresses can be used. + +- Configuration + + When running TIPC in single node mode no configuration whatsoever is needed. + When running in cluster mode TIPC must as a minimum be given a node address + (before Linux 4.17) and told which interface to attach to. The "tipc" + configuration tool makes is possible to add and maintain many more + configuration parameters. + +- Performance + + TIPC message transfer latency times are better than in any other known protocol. + Maximal byte throughput for inter-node connections is still somewhat lower than + for TCP, while they are superior for intra-node and inter-container throughput + on the same host. + +- Language Support + + The TIPC user API has support for C, Python, Perl, Ruby, D and Go. + +More Information +---------------- + +- How to set up TIPC: + + http://tipc.io/getting_started.html + +- How to program with TIPC: + + http://tipc.io/programming.html + +- How to contribute to TIPC: + +- http://tipc.io/contacts.html + +- More details about TIPC specification: + + http://tipc.io/protocol.html + + +Implementation +============== + +TIPC is implemented as a kernel module in net/tipc/ directory. TIPC Base Types --------------- -- 2.27.0 |
From: Xin L. <luc...@gm...> - 2021-06-30 15:45:16
|
On Wed, Jun 30, 2021 at 10:33 AM Jon Maloy <jm...@re...> wrote: > > > On 29/06/2021 17:41, Xin Long wrote: > > On Tue, Jun 29, 2021 at 3:57 PM Jon Maloy <jm...@re...> wrote: > >> > [...] > > Yes, Jon, I mean the opposite. > > > > when MSG_EOR is set, we will go with what this patch does, > > but to delete MSG_EOR if this is not the last part of the data, > > and keep MSG_EOR if this is the last part of the data. > > > > when MSG_EOR is not set, the msg will be truncated as before. > > Yes, that would be a safe behavior. Is SCTP doing this? No, SCTP doesn't need to, as it doesn't truncate msg since the beginning. That's why no compatibility issue was caused. > > ///jon > > > > >> In the first case, we don't solve any compatibility issue, if that is > >> the purpose. The programmer still has to add code to get the current > >> behavior. > >> > >> In the latter case we would be on the 100% safe side, although I have a > >> real hard time to see that this could be a real issue. Why would anybody > >> deliberately design an application for having messages truncated. > >> > >> ///jon > >> > >> > >>>> to indicate we don't want the truncating msg. > >>>> > >>>> When the msg flag returns with no MSG_EOR, it means there's more data to read. > >>>> > >>>> Thanks. > >>>>> - Erin > >>>>> > >>>>>> Signed-off-by: Xin Long <luc...@gm...> > >>>>>> --- > >>>>>> net/tipc/socket.c | 30 +++++++++++++++++++++--------- > >>>>>> 1 file changed, 21 insertions(+), 9 deletions(-) > >>>>>> > >>>>>> diff --git a/net/tipc/socket.c b/net/tipc/socket.c > >>>>>> index 34a97ea36cc8..504e59838b8b 100644 > >>>>>> --- a/net/tipc/socket.c > >>>>>> +++ b/net/tipc/socket.c > >>>>>> @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>>>> bool connected = !tipc_sk_type_connectionless(sk); > >>>>>> struct tipc_sock *tsk = tipc_sk(sk); > >>>>>> int rc, err, hlen, dlen, copy; > >>>>>> + struct tipc_skb_cb *skb_cb; > >>>>>> struct sk_buff_head xmitq; > >>>>>> struct tipc_msg *hdr; > >>>>>> struct sk_buff *skb; > >>>>>> @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>>>> if (unlikely(rc)) > >>>>>> goto exit; > >>>>>> skb = skb_peek(&sk->sk_receive_queue); > >>>>>> + skb_cb = TIPC_SKB_CB(skb); > >>>>>> hdr = buf_msg(skb); > >>>>>> dlen = msg_data_sz(hdr); > >>>>>> hlen = msg_hdr_sz(hdr); > >>>>>> @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>>>> > >>>>>> /* Capture data if non-error msg, otherwise just set return value */ > >>>>>> if (likely(!err)) { > >>>>>> - copy = min_t(int, dlen, buflen); > >>>>>> - if (unlikely(copy != dlen)) > >>>>>> - m->msg_flags |= MSG_TRUNC; > >>>>>> - rc = skb_copy_datagram_msg(skb, hlen, m, copy); > >>>>>> + int offset = skb_cb->bytes_read; > >>>>>> + > >>>>>> + copy = min_t(int, dlen - offset, buflen); > >>>>>> + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); > >>>>>> + if (unlikely(rc)) > >>>>>> + goto exit; > >>>>>> + if (unlikely(offset + copy < dlen)) { > >>>>>> + if (!(flags & MSG_PEEK)) > >>>>>> + skb_cb->bytes_read = offset + copy; > >>>>>> + } else { > >>>>>> + m->msg_flags |= MSG_EOR; > >>>>>> + skb_cb->bytes_read = 0; > >>>>>> + } > >>>>>> } else { > >>>>>> copy = 0; > >>>>>> rc = 0; > >>>>>> - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) > >>>>>> + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { > >>>>>> rc = -ECONNRESET; > >>>>>> + goto exit; > >>>>>> + } > >>>>>> } > >>>>>> - if (unlikely(rc)) > >>>>>> - goto exit; > >>>>>> > >>>>>> /* Mark message as group event if applicable */ > >>>>>> if (unlikely(grp_evt)) { > >>>>>> @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>>>> tipc_node_distr_xmit(sock_net(sk), &xmitq); > >>>>>> } > >>>>>> > >>>>>> - tsk_advance_rx_queue(sk); > >>>>>> + if (!skb_cb->bytes_read) > >>>>>> + tsk_advance_rx_queue(sk); > >>>>>> > >>>>>> - if (likely(!connected)) > >>>>>> + if (likely(!connected) || skb_cb->bytes_read) > >>>>>> goto exit; > >>>>>> > >>>>>> /* Send connection flow control advertisement when applicable */ > >>>>>> -- > >>>>>> 2.27.0 > >>>>>> > >>>>>> > >>>>>> > >>>>>> _______________________________________________ > >>>>>> tipc-discussion mailing list > >>>>>> tip...@li... > >>>>>> https://lists.sourceforge.net/lists/listinfo/tipc-discussion > >>> _______________________________________________ > >>> tipc-discussion mailing list > >>> tip...@li... > >>> https://lists.sourceforge.net/lists/listinfo/tipc-discussion > >>> > |
From: Jon M. <jm...@re...> - 2021-06-30 14:44:47
|
On 29/06/2021 16:10, Erin Shepherd wrote: > Jon Maloy <jm...@re...> writes: >> In the latter case we would be on the 100% safe side, although I have a >> real hard time to see that this could be a real issue. Why would anybody >> deliberately design an application for having messages truncated. > My concern would be more around people using the new behavior on > unsuspecting programs to do "packet smuggling" attacks > > Lets say you have Program A sending messages to Program B which contain > a header followed by some (variable length) data which can be controlled > by a third party. Program B reads messages into a 1024B buffer. > > If I were a malicious attacker, I might try to craft some data for > Program B to send which places a packet header just to appear just after > the new split point. With the new behavior, Program B would think this > was a header (legitimatedly) crafted by Program A. It is entirely > plausible that this header contains identity/trust information which > shouldn't be controllable by external third parties > > With the existing behavior, Program B probably discards these overlong > packets either becasue it sees the truncated flag is set, or because > they are malfformed. With the new behaviour, the first segment would > probably still get discarded, but follow up segments might look > plausible > > - Erin I think a sufficiently dedicated attacker always can find ways to inject packets as when the links are not encrypted. But given Xin's new suggestion we should be as safe as we can be regarding this scenario. ///jon > |
From: Jon M. <jm...@re...> - 2021-06-30 14:33:28
|
On 29/06/2021 17:41, Xin Long wrote: > On Tue, Jun 29, 2021 at 3:57 PM Jon Maloy <jm...@re...> wrote: >> [...] > Yes, Jon, I mean the opposite. > > when MSG_EOR is set, we will go with what this patch does, > but to delete MSG_EOR if this is not the last part of the data, > and keep MSG_EOR if this is the last part of the data. > > when MSG_EOR is not set, the msg will be truncated as before. Yes, that would be a safe behavior. Is SCTP doing this? ///jon > >> In the first case, we don't solve any compatibility issue, if that is >> the purpose. The programmer still has to add code to get the current >> behavior. >> >> In the latter case we would be on the 100% safe side, although I have a >> real hard time to see that this could be a real issue. Why would anybody >> deliberately design an application for having messages truncated. >> >> ///jon >> >> >>>> to indicate we don't want the truncating msg. >>>> >>>> When the msg flag returns with no MSG_EOR, it means there's more data to read. >>>> >>>> Thanks. >>>>> - Erin >>>>> >>>>>> Signed-off-by: Xin Long <luc...@gm...> >>>>>> --- >>>>>> net/tipc/socket.c | 30 +++++++++++++++++++++--------- >>>>>> 1 file changed, 21 insertions(+), 9 deletions(-) >>>>>> >>>>>> diff --git a/net/tipc/socket.c b/net/tipc/socket.c >>>>>> index 34a97ea36cc8..504e59838b8b 100644 >>>>>> --- a/net/tipc/socket.c >>>>>> +++ b/net/tipc/socket.c >>>>>> @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>>>> bool connected = !tipc_sk_type_connectionless(sk); >>>>>> struct tipc_sock *tsk = tipc_sk(sk); >>>>>> int rc, err, hlen, dlen, copy; >>>>>> + struct tipc_skb_cb *skb_cb; >>>>>> struct sk_buff_head xmitq; >>>>>> struct tipc_msg *hdr; >>>>>> struct sk_buff *skb; >>>>>> @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>>>> if (unlikely(rc)) >>>>>> goto exit; >>>>>> skb = skb_peek(&sk->sk_receive_queue); >>>>>> + skb_cb = TIPC_SKB_CB(skb); >>>>>> hdr = buf_msg(skb); >>>>>> dlen = msg_data_sz(hdr); >>>>>> hlen = msg_hdr_sz(hdr); >>>>>> @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>>>> >>>>>> /* Capture data if non-error msg, otherwise just set return value */ >>>>>> if (likely(!err)) { >>>>>> - copy = min_t(int, dlen, buflen); >>>>>> - if (unlikely(copy != dlen)) >>>>>> - m->msg_flags |= MSG_TRUNC; >>>>>> - rc = skb_copy_datagram_msg(skb, hlen, m, copy); >>>>>> + int offset = skb_cb->bytes_read; >>>>>> + >>>>>> + copy = min_t(int, dlen - offset, buflen); >>>>>> + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); >>>>>> + if (unlikely(rc)) >>>>>> + goto exit; >>>>>> + if (unlikely(offset + copy < dlen)) { >>>>>> + if (!(flags & MSG_PEEK)) >>>>>> + skb_cb->bytes_read = offset + copy; >>>>>> + } else { >>>>>> + m->msg_flags |= MSG_EOR; >>>>>> + skb_cb->bytes_read = 0; >>>>>> + } >>>>>> } else { >>>>>> copy = 0; >>>>>> rc = 0; >>>>>> - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) >>>>>> + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { >>>>>> rc = -ECONNRESET; >>>>>> + goto exit; >>>>>> + } >>>>>> } >>>>>> - if (unlikely(rc)) >>>>>> - goto exit; >>>>>> >>>>>> /* Mark message as group event if applicable */ >>>>>> if (unlikely(grp_evt)) { >>>>>> @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>>>> tipc_node_distr_xmit(sock_net(sk), &xmitq); >>>>>> } >>>>>> >>>>>> - tsk_advance_rx_queue(sk); >>>>>> + if (!skb_cb->bytes_read) >>>>>> + tsk_advance_rx_queue(sk); >>>>>> >>>>>> - if (likely(!connected)) >>>>>> + if (likely(!connected) || skb_cb->bytes_read) >>>>>> goto exit; >>>>>> >>>>>> /* Send connection flow control advertisement when applicable */ >>>>>> -- >>>>>> 2.27.0 >>>>>> >>>>>> >>>>>> >>>>>> _______________________________________________ >>>>>> tipc-discussion mailing list >>>>>> tip...@li... >>>>>> https://lists.sourceforge.net/lists/listinfo/tipc-discussion >>> _______________________________________________ >>> tipc-discussion mailing list >>> tip...@li... >>> https://lists.sourceforge.net/lists/listinfo/tipc-discussion >>> |
From: Jon M. <jm...@re...> - 2021-06-30 12:09:05
|
On 11/06/2021 17:33, Xin Long wrote: > kernel-doc for TIPC is too simple, we need to add more information for it. > > This patch is to extend the abstract, and add the Features and Links items. > > Signed-off-by: Xin Long <luc...@gm...> Acked-by: Jon Maloy <jm...@re...> > --- > Documentation/networking/tipc.rst | 121 +++++++++++++++++++++++++++++- > 1 file changed, 118 insertions(+), 3 deletions(-) > > diff --git a/Documentation/networking/tipc.rst b/Documentation/networking/tipc.rst > index 76775f24cdc8..ab63d298cca2 100644 > --- a/Documentation/networking/tipc.rst > +++ b/Documentation/networking/tipc.rst > @@ -4,10 +4,125 @@ > Linux Kernel TIPC > ================= > > -TIPC (Transparent Inter Process Communication) is a protocol that is > -specially designed for intra-cluster communication. > +Introduction > +============ > > -For more information about TIPC, see http://tipc.sourceforge.net. > +TIPC (Transparent Inter Process Communication) is a protocol that is specially > +designed for intra-cluster communication. It can be configured to transmit > +messages either on UDP or directly across Ethernet. Message delivery is > +sequence guaranteed, loss free and flow controlled. Latency times are shorter > +than with any other known protocol, while maximal throughput is comparable to > +that of TCP. > + > +TIPC Features > +------------- > + > +- Cluster wide IPC service > + > + Have you ever wished you had the convenience of Unix Domain Sockets even when > + transmitting data between cluster nodes? Where you yourself determine the > + addresses you want to bind to and use? Where you don't have to perform DNS > + lookups and worry about IP addresses? Where you don't have to start timers > + to monitor the continuous existence of peer sockets? And yet without the > + downsides of that socket type, such as the risk of lingering inodes? > + > + Welcome to the Transparent Inter Process Communication service, TIPC in short, > + which gives you all of this, and a lot more. > + > +- Service Addressing > + > + A fundamental concept in TIPC is that of Service Addressing which makes it > + possible for a programmer to chose his own address, bind it to a server > + socket and let client programs use only that address for sending messages. > + > +- Service Tracking > + > + A client wanting to wait for the availability of a server, uses the Service > + Tracking mechanism to subscribe for binding and unbinding/close events for > + sockets with the associated service address. > + > + The service tracking mechanism can also be used for Cluster Topology Tracking, > + i.e., subscribing for availability/non-availability of cluster nodes. > + > + Likewise, the service tracking mechanism can be used for Cluster Connectivity > + Tracking, i.e., subscribing for up/down events for individual links between > + cluster nodes. > + > +- Transmission Modes > + > + Using a service address, a client can send datagram messages to a server socket. > + > + Using the same address type, it can establish a connection towards an accepting > + server socket. > + > + It can also use a service address to create and join a Communication Group, > + which is the TIPC manifestation of a brokerless message bus. > + > + Multicast with very good performance and scalability is available both in > + datagram mode and in communication group mode. > + > +- Inter Node Links > + > + Communication between any two nodes in a cluster is maintained by one or two > + Inter Node Links, which both guarantee data traffic integrity and monitor > + the peer node's availability. > + > +- Cluster Scalability > + > + By applying the Overlapping Ring Monitoring algorithm on the inter node links > + it is possible to scale TIPC clusters up to 1000 nodes with a maintained > + neighbor failure discovery time of 1-2 seconds. For smaller clusters this > + time can be made much shorter. > + > +- Neighbor Discovery > + > + Neighbor Node Discovery in the cluster is done by Ethernet broadcast or UDP > + multicast, when any of those services are available. If not, configured peer > + IP addresses can be used. > + > +- Configuration > + > + When running TIPC in single node mode no configuration whatsoever is needed. > + When running in cluster mode TIPC must as a minimum be given a node address > + (before Linux 4.17) and told which interface to attach to. The "tipc" > + configuration tool makes is possible to add and maintain many more > + configuration parameters. > + > +- Performance > + > + TIPC message transfer latency times are better than in any other known protocol. > + Maximal byte throughput for inter-node connections is still somewhat lower than > + for TCP, while they are superior for intra-node and inter-container throughput > + on the same host. > + > +- Language Support > + > + The TIPC user API has support for C, Python, Perl, Ruby, D and Go. > + > +More Information > +---------------- > + > +- How to set up TIPC: > + > + http://tipc.io/getting_started.html > + > +- How to program with TIPC: > + > + http://tipc.io/programming.html > + > +- How to contribute to TIPC: > + > +- http://tipc.io/contacts.html > + > +- More details about TIPC specification: > + > + http://tipc.io/protocol.html > + > + > +Implementation > +============== > + > +TIPC is implemented as a kernel module in net/tipc/ directory. > > TIPC Base Types > --------------- |
From: Xin L. <luc...@gm...> - 2021-06-29 21:41:47
|
On Tue, Jun 29, 2021 at 3:57 PM Jon Maloy <jm...@re...> wrote: > > > On 28/06/2021 15:16, Xin Long wrote: > > On Mon, Jun 28, 2021 at 3:03 PM Xin Long <luc...@gm...> wrote: > >> On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd <eri...@e4...> wrote: > >>> Xin Long <luc...@gm...> writes: > >>> > >>>> Currently, when userspace reads a datagram with a buffer that is > >>>> smaller than this datagram, the data will be truncated and only > >>>> part of it can be received by users. It doesn't seem right that > >>>> users don't know the datagram size and have to use a huge buffer > >>>> to read it to avoid the truncation. > >>>> > >>>> This patch to fix it by keeping the skb in rcv queue until the > >>>> whole data is read by users. Only the last msg of the datagram > >>>> will be marked with MSG_EOR, just as TCP/SCTP does. > Makes sense to me. > >>> I agree that the current behavior is suboptimal, but: > >>> > >>> * Isn't this the same behavior that other datagram socket types > >>> exhibit? It seems like this would make TIPC behave inconsistently > >>> compared to other transports > >> Yes, SCTP. > >> Do you see any reliable datagram transports not doing this? > >> > >>> * Isn't this a compatibility break with existing software? Particularly > >>> existing software will not expect to receive trailers of overlong > >>> datagrams > >> I talked to Jon about this, he seems okay with this. > >> > >>> It feels like this behavior should be activated either with a > >>> setsockopt(2) call or a new MSG_* flag passed to recv > >> Anyway, It may not be worth a new sockopt. > >> I'm thinking to pass MSG_EOR into sendmsg: > >> sendmsg(MSG_EOR). > > sorry, I meant recvmsg(); > > Still not sure I understand what you are suggesting here. Do you mean > that if we add MSG_EOR as a flag to recvmsg() that means we *don't* > want the remainder of the message, i.e., it is ok to truncate it? > > Or do you mean the opposite? Yes, Jon, I mean the opposite. when MSG_EOR is set, we will go with what this patch does, but to delete MSG_EOR if this is not the last part of the data, and keep MSG_EOR if this is the last part of the data. when MSG_EOR is not set, the msg will be truncated as before. > > In the first case, we don't solve any compatibility issue, if that is > the purpose. The programmer still has to add code to get the current > behavior. > > In the latter case we would be on the 100% safe side, although I have a > real hard time to see that this could be a real issue. Why would anybody > deliberately design an application for having messages truncated. > > ///jon > > > >> to indicate we don't want the truncating msg. > >> > >> When the msg flag returns with no MSG_EOR, it means there's more data to read. > >> > >> Thanks. > >>> - Erin > >>> > >>>> Signed-off-by: Xin Long <luc...@gm...> > >>>> --- > >>>> net/tipc/socket.c | 30 +++++++++++++++++++++--------- > >>>> 1 file changed, 21 insertions(+), 9 deletions(-) > >>>> > >>>> diff --git a/net/tipc/socket.c b/net/tipc/socket.c > >>>> index 34a97ea36cc8..504e59838b8b 100644 > >>>> --- a/net/tipc/socket.c > >>>> +++ b/net/tipc/socket.c > >>>> @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>> bool connected = !tipc_sk_type_connectionless(sk); > >>>> struct tipc_sock *tsk = tipc_sk(sk); > >>>> int rc, err, hlen, dlen, copy; > >>>> + struct tipc_skb_cb *skb_cb; > >>>> struct sk_buff_head xmitq; > >>>> struct tipc_msg *hdr; > >>>> struct sk_buff *skb; > >>>> @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>> if (unlikely(rc)) > >>>> goto exit; > >>>> skb = skb_peek(&sk->sk_receive_queue); > >>>> + skb_cb = TIPC_SKB_CB(skb); > >>>> hdr = buf_msg(skb); > >>>> dlen = msg_data_sz(hdr); > >>>> hlen = msg_hdr_sz(hdr); > >>>> @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>> > >>>> /* Capture data if non-error msg, otherwise just set return value */ > >>>> if (likely(!err)) { > >>>> - copy = min_t(int, dlen, buflen); > >>>> - if (unlikely(copy != dlen)) > >>>> - m->msg_flags |= MSG_TRUNC; > >>>> - rc = skb_copy_datagram_msg(skb, hlen, m, copy); > >>>> + int offset = skb_cb->bytes_read; > >>>> + > >>>> + copy = min_t(int, dlen - offset, buflen); > >>>> + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); > >>>> + if (unlikely(rc)) > >>>> + goto exit; > >>>> + if (unlikely(offset + copy < dlen)) { > >>>> + if (!(flags & MSG_PEEK)) > >>>> + skb_cb->bytes_read = offset + copy; > >>>> + } else { > >>>> + m->msg_flags |= MSG_EOR; > >>>> + skb_cb->bytes_read = 0; > >>>> + } > >>>> } else { > >>>> copy = 0; > >>>> rc = 0; > >>>> - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) > >>>> + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { > >>>> rc = -ECONNRESET; > >>>> + goto exit; > >>>> + } > >>>> } > >>>> - if (unlikely(rc)) > >>>> - goto exit; > >>>> > >>>> /* Mark message as group event if applicable */ > >>>> if (unlikely(grp_evt)) { > >>>> @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > >>>> tipc_node_distr_xmit(sock_net(sk), &xmitq); > >>>> } > >>>> > >>>> - tsk_advance_rx_queue(sk); > >>>> + if (!skb_cb->bytes_read) > >>>> + tsk_advance_rx_queue(sk); > >>>> > >>>> - if (likely(!connected)) > >>>> + if (likely(!connected) || skb_cb->bytes_read) > >>>> goto exit; > >>>> > >>>> /* Send connection flow control advertisement when applicable */ > >>>> -- > >>>> 2.27.0 > >>>> > >>>> > >>>> > >>>> _______________________________________________ > >>>> tipc-discussion mailing list > >>>> tip...@li... > >>>> https://lists.sourceforge.net/lists/listinfo/tipc-discussion > > > > _______________________________________________ > > tipc-discussion mailing list > > tip...@li... > > https://lists.sourceforge.net/lists/listinfo/tipc-discussion > > > |
From: Jon M. <jm...@re...> - 2021-06-29 19:57:37
|
On 28/06/2021 15:16, Xin Long wrote: > On Mon, Jun 28, 2021 at 3:03 PM Xin Long <luc...@gm...> wrote: >> On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd <eri...@e4...> wrote: >>> Xin Long <luc...@gm...> writes: >>> >>>> Currently, when userspace reads a datagram with a buffer that is >>>> smaller than this datagram, the data will be truncated and only >>>> part of it can be received by users. It doesn't seem right that >>>> users don't know the datagram size and have to use a huge buffer >>>> to read it to avoid the truncation. >>>> >>>> This patch to fix it by keeping the skb in rcv queue until the >>>> whole data is read by users. Only the last msg of the datagram >>>> will be marked with MSG_EOR, just as TCP/SCTP does. Makes sense to me. >>> I agree that the current behavior is suboptimal, but: >>> >>> * Isn't this the same behavior that other datagram socket types >>> exhibit? It seems like this would make TIPC behave inconsistently >>> compared to other transports >> Yes, SCTP. >> Do you see any reliable datagram transports not doing this? >> >>> * Isn't this a compatibility break with existing software? Particularly >>> existing software will not expect to receive trailers of overlong >>> datagrams >> I talked to Jon about this, he seems okay with this. >> >>> It feels like this behavior should be activated either with a >>> setsockopt(2) call or a new MSG_* flag passed to recv >> Anyway, It may not be worth a new sockopt. >> I'm thinking to pass MSG_EOR into sendmsg: >> sendmsg(MSG_EOR). > sorry, I meant recvmsg(); Still not sure I understand what you are suggesting here. Do you mean that if we add MSG_EOR as a flag to recvmsg() that means we *don't* want the remainder of the message, i.e., it is ok to truncate it? Or do you mean the opposite? In the first case, we don't solve any compatibility issue, if that is the purpose. The programmer still has to add code to get the current behavior. In the latter case we would be on the 100% safe side, although I have a real hard time to see that this could be a real issue. Why would anybody deliberately design an application for having messages truncated. ///jon >> to indicate we don't want the truncating msg. >> >> When the msg flag returns with no MSG_EOR, it means there's more data to read. >> >> Thanks. >>> - Erin >>> >>>> Signed-off-by: Xin Long <luc...@gm...> >>>> --- >>>> net/tipc/socket.c | 30 +++++++++++++++++++++--------- >>>> 1 file changed, 21 insertions(+), 9 deletions(-) >>>> >>>> diff --git a/net/tipc/socket.c b/net/tipc/socket.c >>>> index 34a97ea36cc8..504e59838b8b 100644 >>>> --- a/net/tipc/socket.c >>>> +++ b/net/tipc/socket.c >>>> @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>> bool connected = !tipc_sk_type_connectionless(sk); >>>> struct tipc_sock *tsk = tipc_sk(sk); >>>> int rc, err, hlen, dlen, copy; >>>> + struct tipc_skb_cb *skb_cb; >>>> struct sk_buff_head xmitq; >>>> struct tipc_msg *hdr; >>>> struct sk_buff *skb; >>>> @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>> if (unlikely(rc)) >>>> goto exit; >>>> skb = skb_peek(&sk->sk_receive_queue); >>>> + skb_cb = TIPC_SKB_CB(skb); >>>> hdr = buf_msg(skb); >>>> dlen = msg_data_sz(hdr); >>>> hlen = msg_hdr_sz(hdr); >>>> @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>> >>>> /* Capture data if non-error msg, otherwise just set return value */ >>>> if (likely(!err)) { >>>> - copy = min_t(int, dlen, buflen); >>>> - if (unlikely(copy != dlen)) >>>> - m->msg_flags |= MSG_TRUNC; >>>> - rc = skb_copy_datagram_msg(skb, hlen, m, copy); >>>> + int offset = skb_cb->bytes_read; >>>> + >>>> + copy = min_t(int, dlen - offset, buflen); >>>> + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); >>>> + if (unlikely(rc)) >>>> + goto exit; >>>> + if (unlikely(offset + copy < dlen)) { >>>> + if (!(flags & MSG_PEEK)) >>>> + skb_cb->bytes_read = offset + copy; >>>> + } else { >>>> + m->msg_flags |= MSG_EOR; >>>> + skb_cb->bytes_read = 0; >>>> + } >>>> } else { >>>> copy = 0; >>>> rc = 0; >>>> - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) >>>> + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { >>>> rc = -ECONNRESET; >>>> + goto exit; >>>> + } >>>> } >>>> - if (unlikely(rc)) >>>> - goto exit; >>>> >>>> /* Mark message as group event if applicable */ >>>> if (unlikely(grp_evt)) { >>>> @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, >>>> tipc_node_distr_xmit(sock_net(sk), &xmitq); >>>> } >>>> >>>> - tsk_advance_rx_queue(sk); >>>> + if (!skb_cb->bytes_read) >>>> + tsk_advance_rx_queue(sk); >>>> >>>> - if (likely(!connected)) >>>> + if (likely(!connected) || skb_cb->bytes_read) >>>> goto exit; >>>> >>>> /* Send connection flow control advertisement when applicable */ >>>> -- >>>> 2.27.0 >>>> >>>> >>>> >>>> _______________________________________________ >>>> tipc-discussion mailing list >>>> tip...@li... >>>> https://lists.sourceforge.net/lists/listinfo/tipc-discussion > > _______________________________________________ > tipc-discussion mailing list > tip...@li... > https://lists.sourceforge.net/lists/listinfo/tipc-discussion > |
From: Xin L. <luc...@gm...> - 2021-06-28 19:16:37
|
On Mon, Jun 28, 2021 at 3:03 PM Xin Long <luc...@gm...> wrote: > > On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd <eri...@e4...> wrote: > > > > Xin Long <luc...@gm...> writes: > > > > > Currently, when userspace reads a datagram with a buffer that is > > > smaller than this datagram, the data will be truncated and only > > > part of it can be received by users. It doesn't seem right that > > > users don't know the datagram size and have to use a huge buffer > > > to read it to avoid the truncation. > > > > > > This patch to fix it by keeping the skb in rcv queue until the > > > whole data is read by users. Only the last msg of the datagram > > > will be marked with MSG_EOR, just as TCP/SCTP does. > > > > I agree that the current behavior is suboptimal, but: > > > > * Isn't this the same behavior that other datagram socket types > > exhibit? It seems like this would make TIPC behave inconsistently > > compared to other transports > Yes, SCTP. > Do you see any reliable datagram transports not doing this? > > > * Isn't this a compatibility break with existing software? Particularly > > existing software will not expect to receive trailers of overlong > > datagrams > I talked to Jon about this, he seems okay with this. > > > > > It feels like this behavior should be activated either with a > > setsockopt(2) call or a new MSG_* flag passed to recv > Anyway, It may not be worth a new sockopt. > I'm thinking to pass MSG_EOR into sendmsg: > sendmsg(MSG_EOR). sorry, I meant recvmsg(); > to indicate we don't want the truncating msg. > > When the msg flag returns with no MSG_EOR, it means there's more data to read. > > Thanks. > > > > - Erin > > > > > Signed-off-by: Xin Long <luc...@gm...> > > > --- > > > net/tipc/socket.c | 30 +++++++++++++++++++++--------- > > > 1 file changed, 21 insertions(+), 9 deletions(-) > > > > > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c > > > index 34a97ea36cc8..504e59838b8b 100644 > > > --- a/net/tipc/socket.c > > > +++ b/net/tipc/socket.c > > > @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > > bool connected = !tipc_sk_type_connectionless(sk); > > > struct tipc_sock *tsk = tipc_sk(sk); > > > int rc, err, hlen, dlen, copy; > > > + struct tipc_skb_cb *skb_cb; > > > struct sk_buff_head xmitq; > > > struct tipc_msg *hdr; > > > struct sk_buff *skb; > > > @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > > if (unlikely(rc)) > > > goto exit; > > > skb = skb_peek(&sk->sk_receive_queue); > > > + skb_cb = TIPC_SKB_CB(skb); > > > hdr = buf_msg(skb); > > > dlen = msg_data_sz(hdr); > > > hlen = msg_hdr_sz(hdr); > > > @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > > > > > /* Capture data if non-error msg, otherwise just set return value */ > > > if (likely(!err)) { > > > - copy = min_t(int, dlen, buflen); > > > - if (unlikely(copy != dlen)) > > > - m->msg_flags |= MSG_TRUNC; > > > - rc = skb_copy_datagram_msg(skb, hlen, m, copy); > > > + int offset = skb_cb->bytes_read; > > > + > > > + copy = min_t(int, dlen - offset, buflen); > > > + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); > > > + if (unlikely(rc)) > > > + goto exit; > > > + if (unlikely(offset + copy < dlen)) { > > > + if (!(flags & MSG_PEEK)) > > > + skb_cb->bytes_read = offset + copy; > > > + } else { > > > + m->msg_flags |= MSG_EOR; > > > + skb_cb->bytes_read = 0; > > > + } > > > } else { > > > copy = 0; > > > rc = 0; > > > - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) > > > + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { > > > rc = -ECONNRESET; > > > + goto exit; > > > + } > > > } > > > - if (unlikely(rc)) > > > - goto exit; > > > > > > /* Mark message as group event if applicable */ > > > if (unlikely(grp_evt)) { > > > @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > > tipc_node_distr_xmit(sock_net(sk), &xmitq); > > > } > > > > > > - tsk_advance_rx_queue(sk); > > > + if (!skb_cb->bytes_read) > > > + tsk_advance_rx_queue(sk); > > > > > > - if (likely(!connected)) > > > + if (likely(!connected) || skb_cb->bytes_read) > > > goto exit; > > > > > > /* Send connection flow control advertisement when applicable */ > > > -- > > > 2.27.0 > > > > > > > > > > > > _______________________________________________ > > > tipc-discussion mailing list > > > tip...@li... > > > https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: Xin L. <luc...@gm...> - 2021-06-28 19:03:20
|
On Sun, Jun 27, 2021 at 3:44 PM Erin Shepherd <eri...@e4...> wrote: > > Xin Long <luc...@gm...> writes: > > > Currently, when userspace reads a datagram with a buffer that is > > smaller than this datagram, the data will be truncated and only > > part of it can be received by users. It doesn't seem right that > > users don't know the datagram size and have to use a huge buffer > > to read it to avoid the truncation. > > > > This patch to fix it by keeping the skb in rcv queue until the > > whole data is read by users. Only the last msg of the datagram > > will be marked with MSG_EOR, just as TCP/SCTP does. > > I agree that the current behavior is suboptimal, but: > > * Isn't this the same behavior that other datagram socket types > exhibit? It seems like this would make TIPC behave inconsistently > compared to other transports Yes, SCTP. Do you see any reliable datagram transports not doing this? > * Isn't this a compatibility break with existing software? Particularly > existing software will not expect to receive trailers of overlong > datagrams I talked to Jon about this, he seems okay with this. > > It feels like this behavior should be activated either with a > setsockopt(2) call or a new MSG_* flag passed to recv Anyway, It may not be worth a new sockopt. I'm thinking to pass MSG_EOR into sendmsg: sendmsg(MSG_EOR). to indicate we don't want the truncating msg. When the msg flag returns with no MSG_EOR, it means there's more data to read. Thanks. > > - Erin > > > Signed-off-by: Xin Long <luc...@gm...> > > --- > > net/tipc/socket.c | 30 +++++++++++++++++++++--------- > > 1 file changed, 21 insertions(+), 9 deletions(-) > > > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c > > index 34a97ea36cc8..504e59838b8b 100644 > > --- a/net/tipc/socket.c > > +++ b/net/tipc/socket.c > > @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > bool connected = !tipc_sk_type_connectionless(sk); > > struct tipc_sock *tsk = tipc_sk(sk); > > int rc, err, hlen, dlen, copy; > > + struct tipc_skb_cb *skb_cb; > > struct sk_buff_head xmitq; > > struct tipc_msg *hdr; > > struct sk_buff *skb; > > @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > if (unlikely(rc)) > > goto exit; > > skb = skb_peek(&sk->sk_receive_queue); > > + skb_cb = TIPC_SKB_CB(skb); > > hdr = buf_msg(skb); > > dlen = msg_data_sz(hdr); > > hlen = msg_hdr_sz(hdr); > > @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > > > /* Capture data if non-error msg, otherwise just set return value */ > > if (likely(!err)) { > > - copy = min_t(int, dlen, buflen); > > - if (unlikely(copy != dlen)) > > - m->msg_flags |= MSG_TRUNC; > > - rc = skb_copy_datagram_msg(skb, hlen, m, copy); > > + int offset = skb_cb->bytes_read; > > + > > + copy = min_t(int, dlen - offset, buflen); > > + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); > > + if (unlikely(rc)) > > + goto exit; > > + if (unlikely(offset + copy < dlen)) { > > + if (!(flags & MSG_PEEK)) > > + skb_cb->bytes_read = offset + copy; > > + } else { > > + m->msg_flags |= MSG_EOR; > > + skb_cb->bytes_read = 0; > > + } > > } else { > > copy = 0; > > rc = 0; > > - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) > > + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { > > rc = -ECONNRESET; > > + goto exit; > > + } > > } > > - if (unlikely(rc)) > > - goto exit; > > > > /* Mark message as group event if applicable */ > > if (unlikely(grp_evt)) { > > @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, > > tipc_node_distr_xmit(sock_net(sk), &xmitq); > > } > > > > - tsk_advance_rx_queue(sk); > > + if (!skb_cb->bytes_read) > > + tsk_advance_rx_queue(sk); > > > > - if (likely(!connected)) > > + if (likely(!connected) || skb_cb->bytes_read) > > goto exit; > > > > /* Send connection flow control advertisement when applicable */ > > -- > > 2.27.0 > > > > > > > > _______________________________________________ > > tipc-discussion mailing list > > tip...@li... > > https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: Xin L. <luc...@gm...> - 2021-06-26 03:40:34
|
Currently, when userspace reads a datagram with a buffer that is smaller than this datagram, the data will be truncated and only part of it can be received by users. It doesn't seem right that users don't know the datagram size and have to use a huge buffer to read it to avoid the truncation. This patch to fix it by keeping the skb in rcv queue until the whole data is read by users. Only the last msg of the datagram will be marked with MSG_EOR, just as TCP/SCTP does. Signed-off-by: Xin Long <luc...@gm...> --- net/tipc/socket.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 34a97ea36cc8..504e59838b8b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1880,6 +1880,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, bool connected = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; struct sk_buff_head xmitq; struct tipc_msg *hdr; struct sk_buff *skb; @@ -1903,6 +1904,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); hdr = buf_msg(skb); dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); @@ -1922,18 +1924,27 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { - copy = min_t(int, dlen, buflen); - if (unlikely(copy != dlen)) - m->msg_flags |= MSG_TRUNC; - rc = skb_copy_datagram_msg(skb, hlen, m, copy); + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } } else { copy = 0; rc = 0; - if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { rc = -ECONNRESET; + goto exit; + } } - if (unlikely(rc)) - goto exit; /* Mark message as group event if applicable */ if (unlikely(grp_evt)) { @@ -1956,9 +1967,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, tipc_node_distr_xmit(sock_net(sk), &xmitq); } - tsk_advance_rx_queue(sk); + if (!skb_cb->bytes_read) + tsk_advance_rx_queue(sk); - if (likely(!connected)) + if (likely(!connected) || skb_cb->bytes_read) goto exit; /* Send connection flow control advertisement when applicable */ -- 2.27.0 |
From: Jon M. <jm...@re...> - 2021-06-18 12:49:00
|
On 6/18/21 2:57 AM, men...@gm... wrote: > From: Menglong Dong <don...@zt...> > > FB_MTU is used in 'tipc_msg_build()' to alloc smaller skb when memory > allocation fails, which can avoid unnecessary sending failures. > > The value of FB_MTU now is 3744, and the data size will be: > > (3744 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \ > SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + 3)) > > which is larger than one page(4096), and two pages will be allocated. > > To avoid it, replace '3744' with a calculation: > > (PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) - \ > SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) > > What's more, alloc_skb_fclone() will call SKB_DATA_ALIGN for data size, > and it's not necessary to make alignment for buf_size in > tipc_buf_acquire(). So, just remove it. > > Fixes: 4c94cc2d3d57 ("tipc: fall back to smaller MTU if allocation of local send skb fails") > > Signed-off-by: Menglong Dong <don...@zt...> > --- > V4: > - fallback to V2 > > V3: > - split tipc_msg_build to tipc_msg_build and tipc_msg_frag > - introduce tipc_buf_acquire_flex, which is able to alloc skb for > local message > - add the variate 'local' in tipc_msg_build to check if this is a > local message. > > V2: > - define FB_MTU in msg.c instead of introduce a new file > - remove align for buf_size in tipc_buf_acquire() > --- > net/tipc/bcast.c | 2 +- > net/tipc/msg.c | 19 ++++++++++--------- > net/tipc/msg.h | 3 ++- > 3 files changed, 13 insertions(+), 11 deletions(-) > > diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c > index d4beca895992..593846d25214 100644 > --- a/net/tipc/bcast.c > +++ b/net/tipc/bcast.c > @@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net) > spin_lock_init(&tipc_net(net)->bclock); > > if (!tipc_link_bc_create(net, 0, 0, NULL, > - FB_MTU, > + one_page_mtu, > BCLINK_WIN_DEFAULT, > BCLINK_WIN_DEFAULT, > 0, > diff --git a/net/tipc/msg.c b/net/tipc/msg.c > index ce6ab54822d8..912d17b3fc01 100644 > --- a/net/tipc/msg.c > +++ b/net/tipc/msg.c > @@ -44,12 +44,17 @@ > #define MAX_FORWARD_SIZE 1024 > #ifdef CONFIG_TIPC_CRYPTO > #define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16) > -#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE) > +#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE) > #else > #define BUF_HEADROOM (LL_MAX_HEADER + 48) > -#define BUF_TAILROOM 16 > +#define BUF_OVERHEAD BUF_HEADROOM > #endif > > +#define ONE_PAGE_SKB_SZ (PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) - \ > + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) > + I suggest one small simplification: Let's just skip this macro completely, and assign the calculation directly to one_page_mtu below. Otherwise: Acked-by: Jon Maloy <jm...@re...> > +const int one_page_mtu = ONE_PAGE_SKB_SZ; > + > static unsigned int align(unsigned int i) > { > return (i + 3) & ~3u; > @@ -69,13 +74,8 @@ static unsigned int align(unsigned int i) > struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) > { > struct sk_buff *skb; > -#ifdef CONFIG_TIPC_CRYPTO > - unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u; > -#else > - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; > -#endif > > - skb = alloc_skb_fclone(buf_size, gfp); > + skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp); > if (skb) { > skb_reserve(skb, BUF_HEADROOM); > skb_put(skb, size); > @@ -395,7 +395,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > if (unlikely(!skb)) { > if (pktmax != MAX_MSG_SIZE) > return -ENOMEM; > - rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); > + rc = tipc_msg_build(mhdr, m, offset, dsz, > + ONE_PAGE_SKB_SZ, list); > if (rc != dsz) > return rc; > if (tipc_msg_assemble(list)) > diff --git a/net/tipc/msg.h b/net/tipc/msg.h > index 5d64596ba987..64ae4c4c44f8 100644 > --- a/net/tipc/msg.h > +++ b/net/tipc/msg.h > @@ -99,9 +99,10 @@ struct plist; > #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ > > #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) > -#define FB_MTU 3744 > #define TIPC_MEDIA_INFO_OFFSET 5 > > +extern const int one_page_mtu; > + > struct tipc_skb_cb { > union { > struct { |
From: Jon M. <jm...@re...> - 2021-06-16 17:20:11
|
On 6/15/21 5:45 AM, men...@gm... wrote: > From: Menglong Dong <don...@zt...> > > FB_MTU is used in 'tipc_msg_build()' to alloc smaller skb when memory > allocation fails, which can avoid unnecessary sending failures. > > The value of FB_MTU now is 3744, and the data size will be: > > (3744 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \ > SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + 3)) > > which is larger than one page(4096), and two pages will be allocated. > > To avoid it, replace '3744' with a calculation: > > FB_MTU=(PAGE_SIZE - \ > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - \ > SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + \ > EHDR_MAX_SIZE) \ > ) > > which is for crypto skb, and: > > FB_MTU_LOCAL=(PAGE_SIZE - SKB_DATA_ALIGN(BUF_HEADROOM) - \ > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \ > ) > which is for local message. > > And BUF_HEADROOM is defined as non-crypto version. > > What's more, alloc_skb_fclone() will call SKB_DATA_ALIGN for data size, > and it's not necessary to make alignment for buf_size in > tipc_buf_acquire(). So, just remove it. > > Fixes: 4c94cc2d3d57 ("tipc: fall back to smaller MTU if allocation of local send skb fails") > > Signed-off-by: Menglong Dong <don...@zt...> > --- > V3: > - split tipc_msg_build to tipc_msg_build and tipc_msg_frag > - introduce tipc_buf_acquire_flex, which is able to alloc skb for > local message > - add the variate 'local' in tipc_msg_build to check if this is a > local message. > > V2: > - define FB_MTU in msg.c instead of introduce a new file > - remove align for buf_size in tipc_buf_acquire() > --- > net/tipc/bcast.c | 2 +- > net/tipc/msg.c | 168 +++++++++++++++++++++++++++++------------------ > net/tipc/msg.h | 3 +- > 3 files changed, 108 insertions(+), 65 deletions(-) > > diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c > index d4beca895992..9daace9542f4 100644 > --- a/net/tipc/bcast.c > +++ b/net/tipc/bcast.c > @@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net) > spin_lock_init(&tipc_net(net)->bclock); > > if (!tipc_link_bc_create(net, 0, 0, NULL, > - FB_MTU, > + fb_mtu, > BCLINK_WIN_DEFAULT, > BCLINK_WIN_DEFAULT, > 0, > diff --git a/net/tipc/msg.c b/net/tipc/msg.c > index ce6ab54822d8..349107e08d6f 100644 > --- a/net/tipc/msg.c > +++ b/net/tipc/msg.c > @@ -42,19 +42,46 @@ > #include "crypto.h" > > #define MAX_FORWARD_SIZE 1024 > +#define BUF_HEADROOM ALIGN(LL_MAX_HEADER + 48, 16) > + > #ifdef CONFIG_TIPC_CRYPTO > -#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16) > -#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE) > +#define BUF_TAILROOM TIPC_AES_GCM_TAG_SIZE > #else > -#define BUF_HEADROOM (LL_MAX_HEADER + 48) > -#define BUF_TAILROOM 16 > +#define EHDR_MAX_SIZE 0 > +#define BUF_TAILROOM 0 > #endif > We need either a comment or a naming that explains why we are doing all this, i.e., that we want a buffer that fits within one page. > +#define FB_MTU (PAGE_SIZE - \ > + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - \ > + SKB_DATA_ALIGN(BUF_HEADROOM + BUF_TAILROOM + \ > + EHDR_MAX_SIZE) \ > + ) > + > +#define FB_MTU_LOCAL (PAGE_SIZE - SKB_DATA_ALIGN(BUF_HEADROOM) - \ > + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \ > + ) > + > +const int fb_mtu = FB_MTU; > + > static unsigned int align(unsigned int i) > { > return (i + 3) & ~3u; > } This one should be completely replaced with the ALIGN macro, or maybe we should define our own ALIGN4() macro based on that, unnless there already is some. But that should be a separate patch. > > +static inline struct sk_buff *tipc_alloc_skb(int headroom, int tailroom, > + int size, gfp_t gfp) > +{ > + struct sk_buff *skb; > + > + skb = alloc_skb_fclone(size + headroom + tailroom, gfp); > + if (skb) { > + skb_reserve(skb, headroom); > + skb_put(skb, size); > + skb->next = NULL; > + } > + return skb; > +} > + > /** > * tipc_buf_acquire - creates a TIPC message buffer > * @size: message size (including TIPC header) > @@ -68,20 +95,17 @@ static unsigned int align(unsigned int i) > */ > struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) > { > - struct sk_buff *skb; > -#ifdef CONFIG_TIPC_CRYPTO > - unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u; > -#else > - unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; > -#endif > + return tipc_alloc_skb(BUF_HEADROOM + EHDR_MAX_SIZE, > + BUF_TAILROOM, size, gfp); > +} > So far, so good. > - skb = alloc_skb_fclone(buf_size, gfp); > - if (skb) { > - skb_reserve(skb, BUF_HEADROOM); > - skb_put(skb, size); > - skb->next = NULL; > - } > - return skb > ; > +static struct sk_buff *tipc_buf_acquire_flex(u32 size, bool local, > + gfp_t gfp) I feel this function is overkill and way too intrusive for my comfort, especially for such a marginal fix this is meant to be. We sure may save a few bytes when allocating local buffers, but most buffers are not so close to a page limit a that it will make any difference. And even if we happen to save a page now and then, it is not worth it. Stick to tipc_buf_acquire() where it is used now, so we don´t have to change any more code. BR ///jon > +{ > + if (local) > + return tipc_alloc_skb(BUF_HEADROOM, 0, size, gfp); > + else > + return tipc_buf_acquire(size, gfp); > } > > void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type, > @@ -357,26 +381,12 @@ int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, > return -ENOMEM; > } > > -/** > - * tipc_msg_build - create buffer chain containing specified header and data > - * @mhdr: Message header, to be prepended to data > - * @m: User message > - * @offset: buffer offset for fragmented messages (FIXME) > - * @dsz: Total length of user data > - * @pktmax: Max packet size that can be used > - * @list: Buffer or chain of buffers to be returned to caller > - * > - * Note that the recursive call we are making here is safe, since it can > - * logically go only one further level down. > - * > - * Return: message data size or errno: -ENOMEM, -EFAULT > - */ > -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > - int dsz, int pktmax, struct sk_buff_head *list) > +static int tipc_msg_frag(struct tipc_msg *mhdr, struct msghdr *m, int dsz, > + int pktmax, struct sk_buff_head *list, > + bool local) Same comment as above. > { > int mhsz = msg_hdr_sz(mhdr); > struct tipc_msg pkthdr; > - int msz = mhsz + dsz; > int pktrem = pktmax; > struct sk_buff *skb; > int drem = dsz; > @@ -385,33 +395,6 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > int pktsz; > int rc; > > - msg_set_size(mhdr, msz); > - > - /* No fragmentation needed? */ > - if (likely(msz <= pktmax)) { > - skb = tipc_buf_acquire(msz, GFP_KERNEL); > - > - /* Fall back to smaller MTU if node local message */ > - if (unlikely(!skb)) { > - if (pktmax != MAX_MSG_SIZE) > - return -ENOMEM; > - rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); > - if (rc != dsz) > - return rc; > - if (tipc_msg_assemble(list)) > - return dsz; > - return -ENOMEM; > - } > - skb_orphan(skb); > - __skb_queue_tail(list, skb); > - skb_copy_to_linear_data(skb, mhdr, mhsz); > - pktpos = skb->data + mhsz; > - if (copy_from_iter_full(pktpos, dsz, &m->msg_iter)) > - return dsz; > - rc = -EFAULT; > - goto error; > - } > - > /* Prepare reusable fragment header */ > tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER, > FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); > @@ -420,7 +403,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > msg_set_importance(&pkthdr, msg_importance(mhdr)); > > /* Prepare first fragment */ > - skb = tipc_buf_acquire(pktmax, GFP_KERNEL); > + skb = tipc_buf_acquire_flex(pktmax, local, GFP_KERNEL); > if (!skb) > return -ENOMEM; > skb_orphan(skb); > @@ -451,7 +434,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > pktsz = drem + INT_H_SIZE; > else > pktsz = pktmax; > - skb = tipc_buf_acquire(pktsz, GFP_KERNEL); > + skb = tipc_buf_acquire_flex(pktsz, local, GFP_KERNEL); > if (!skb) { > rc = -ENOMEM; > goto error; > @@ -474,6 +457,65 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > return rc; > } > > +/** > + * tipc_msg_build - create buffer chain containing specified header and data > + * @mhdr: Message header, to be prepended to data > + * @m: User message > + * @offset: buffer offset for fragmented messages (FIXME) > + * @dsz: Total length of user data > + * @pktmax: Max packet size that can be used > + * @list: Buffer or chain of buffers to be returned to caller > + * > + * Note that the recursive call we are making here is safe, since it can > + * logically go only one further level down. > + * > + * Return: message data size or errno: -ENOMEM, -EFAULT > + */ > +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, > + int dsz, int pktmax, struct sk_buff_head *list) > +{ > + int mhsz = msg_hdr_sz(mhdr); > + int msz = mhsz + dsz; > + struct sk_buff *skb; > + bool local = false; > + char *pktpos; > + int rc; > + > + msg_set_size(mhdr, msz); > + if (pktmax == MAX_MSG_SIZE) > + local = true; > + > + /* Fragmentation needed */ > + if (unlikely(msz <= pktmax)) > + return tipc_msg_frag(mhdr, m, dsz, pktmax, list, local); > + > + skb = tipc_buf_acquire_flex(msz, local, GFP_KERNEL); > + > + /* Fall back to smaller MTU if node local message */ > + if (unlikely(!skb)) > + goto try_frag; > + > + skb_orphan(skb); > + skb_copy_to_linear_data(skb, mhdr, mhsz); > + pktpos = skb->data + mhsz; > + if (copy_from_iter_full(pktpos, dsz, &m->msg_iter)) { > + __skb_queue_tail(list, skb); > + return dsz; > + } > + __skb_queue_head_init(list); > + return -EFAULT; > + > +try_frag: > + if (!local) > + return -ENOMEM; > + rc = tipc_msg_frag(mhdr, m, dsz, FB_MTU_LOCAL, list, true); > + if (rc != dsz) > + return rc; > + if (tipc_msg_assemble(list)) > + return dsz; > + return -ENOMEM; > +} > + > /** > * tipc_msg_bundle - Append contents of a buffer to tail of an existing one > * @bskb: the bundle buffer to append to > diff --git a/net/tipc/msg.h b/net/tipc/msg.h > index 5d64596ba987..2c214691037c 100644 > --- a/net/tipc/msg.h > +++ b/net/tipc/msg.h > @@ -99,9 +99,10 @@ struct plist; > #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ > > #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) > -#define FB_MTU 3744 > #define TIPC_MEDIA_INFO_OFFSET 5 > > +extern const int fb_mtu; > + > struct tipc_skb_cb { > union { > struct { |
From: Xin L. <luc...@gm...> - 2021-06-11 21:33:49
|
kernel-doc for TIPC is too simple, we need to add more information for it. This patch is to extend the abstract, and add the Features and Links items. Signed-off-by: Xin Long <luc...@gm...> --- Documentation/networking/tipc.rst | 121 +++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/tipc.rst b/Documentation/networking/tipc.rst index 76775f24cdc8..ab63d298cca2 100644 --- a/Documentation/networking/tipc.rst +++ b/Documentation/networking/tipc.rst @@ -4,10 +4,125 @@ Linux Kernel TIPC ================= -TIPC (Transparent Inter Process Communication) is a protocol that is -specially designed for intra-cluster communication. +Introduction +============ -For more information about TIPC, see http://tipc.sourceforge.net. +TIPC (Transparent Inter Process Communication) is a protocol that is specially +designed for intra-cluster communication. It can be configured to transmit +messages either on UDP or directly across Ethernet. Message delivery is +sequence guaranteed, loss free and flow controlled. Latency times are shorter +than with any other known protocol, while maximal throughput is comparable to +that of TCP. + +TIPC Features +------------- + +- Cluster wide IPC service + + Have you ever wished you had the convenience of Unix Domain Sockets even when + transmitting data between cluster nodes? Where you yourself determine the + addresses you want to bind to and use? Where you don't have to perform DNS + lookups and worry about IP addresses? Where you don't have to start timers + to monitor the continuous existence of peer sockets? And yet without the + downsides of that socket type, such as the risk of lingering inodes? + + Welcome to the Transparent Inter Process Communication service, TIPC in short, + which gives you all of this, and a lot more. + +- Service Addressing + + A fundamental concept in TIPC is that of Service Addressing which makes it + possible for a programmer to chose his own address, bind it to a server + socket and let client programs use only that address for sending messages. + +- Service Tracking + + A client wanting to wait for the availability of a server, uses the Service + Tracking mechanism to subscribe for binding and unbinding/close events for + sockets with the associated service address. + + The service tracking mechanism can also be used for Cluster Topology Tracking, + i.e., subscribing for availability/non-availability of cluster nodes. + + Likewise, the service tracking mechanism can be used for Cluster Connectivity + Tracking, i.e., subscribing for up/down events for individual links between + cluster nodes. + +- Transmission Modes + + Using a service address, a client can send datagram messages to a server socket. + + Using the same address type, it can establish a connection towards an accepting + server socket. + + It can also use a service address to create and join a Communication Group, + which is the TIPC manifestation of a brokerless message bus. + + Multicast with very good performance and scalability is available both in + datagram mode and in communication group mode. + +- Inter Node Links + + Communication between any two nodes in a cluster is maintained by one or two + Inter Node Links, which both guarantee data traffic integrity and monitor + the peer node's availability. + +- Cluster Scalability + + By applying the Overlapping Ring Monitoring algorithm on the inter node links + it is possible to scale TIPC clusters up to 1000 nodes with a maintained + neighbor failure discovery time of 1-2 seconds. For smaller clusters this + time can be made much shorter. + +- Neighbor Discovery + + Neighbor Node Discovery in the cluster is done by Ethernet broadcast or UDP + multicast, when any of those services are available. If not, configured peer + IP addresses can be used. + +- Configuration + + When running TIPC in single node mode no configuration whatsoever is needed. + When running in cluster mode TIPC must as a minimum be given a node address + (before Linux 4.17) and told which interface to attach to. The "tipc" + configuration tool makes is possible to add and maintain many more + configuration parameters. + +- Performance + + TIPC message transfer latency times are better than in any other known protocol. + Maximal byte throughput for inter-node connections is still somewhat lower than + for TCP, while they are superior for intra-node and inter-container throughput + on the same host. + +- Language Support + + The TIPC user API has support for C, Python, Perl, Ruby, D and Go. + +More Information +---------------- + +- How to set up TIPC: + + http://tipc.io/getting_started.html + +- How to program with TIPC: + + http://tipc.io/programming.html + +- How to contribute to TIPC: + +- http://tipc.io/contacts.html + +- More details about TIPC specification: + + http://tipc.io/protocol.html + + +Implementation +============== + +TIPC is implemented as a kernel module in net/tipc/ directory. TIPC Base Types --------------- -- 2.27.0 |