From: Jon M. <jon...@er...> - 2019-07-22 14:43:43
|
Acked-by: Jon Maloy <jon...@er...> Remember to change the prefix to 'net-next' and send it when net-next re-opens. ///jon > -----Original Message----- > From: Tuong Lien <tuo...@de...> > Sent: 18-Jul-19 23:57 > To: tip...@li...; Jon Maloy > <jon...@er...>; ma...@do...; yin...@wi... > Subject: [net] tipc: fix false detection of retransmit failures > > This commit eliminates the use of the link 'stale_limit' & 'prev_from' > (besides the already removed - 'stale_cnt') variables in the detection of > repeated retransmit failures as there is no proper way to initialize them to > avoid a false detection, i.e. it is not really a retransmission failure but due to a > garbage values in the variables. > > Instead, a jiffies variable will be added to individual skbs (like the way we > restrict the skb retransmissions) in order to mark the first skb retransmit time. > Later on, at the next retransmissions, the timestamp will be checked to see if > the skb in the link transmq is "too stale", that is, the link tolerance time has > passed, so that a link reset will be ordered. Note, just checking on the first skb > in the queue is fine enough since it must be the oldest one. > A counter is also added to keep track the actual skb retransmissions' > number for later checking when the failure happens. > > The downside of this approach is that the skb->cb[] buffer is about to be > exhausted, however it is always able to allocate another memory area and > keep a reference to it when needed. > > Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria") > Reported-by: Hoang Le <hoa...@de...> > Signed-off-by: Tuong Lien <tuo...@de...> > --- > net/tipc/link.c | 92 ++++++++++++++++++++++++++++++++--------------------- > ---- > net/tipc/msg.h | 8 +++-- > 2 files changed, 57 insertions(+), 43 deletions(-) > > diff --git a/net/tipc/link.c b/net/tipc/link.c index > 66d3a07bc571..c2c5c53cad22 100644 > --- a/net/tipc/link.c > +++ b/net/tipc/link.c > @@ -106,8 +106,6 @@ struct tipc_stats { > * @transmitq: queue for sent, non-acked messages > * @backlogq: queue for messages waiting to be sent > * @snt_nxt: next sequence number to use for outbound messages > - * @prev_from: sequence number of most previous retransmission request > - * @stale_limit: time when repeated identical retransmits must force link > reset > * @ackers: # of peers that needs to ack each packet before it can be released > * @acked: # last packet acked by a certain peer. Used for broadcast. > * @rcv_nxt: next sequence number to expect for inbound messages @@ - > 164,9 +162,7 @@ struct tipc_link { > u16 limit; > } backlog[5]; > u16 snd_nxt; > - u16 prev_from; > u16 window; > - unsigned long stale_limit; > > /* Reception */ > u16 rcv_nxt; > @@ -1044,47 +1040,53 @@ static void tipc_link_advance_backlog(struct > tipc_link *l, > * link_retransmit_failure() - Detect repeated retransmit failures > * @l: tipc link sender > * @r: tipc link receiver (= l in case of unicast) > - * @from: seqno of the 1st packet in retransmit request > * @rc: returned code > * > * Return: true if the repeated retransmit failures happens, otherwise > * false > */ > static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, > - u16 from, int *rc) > + int *rc) > { > struct sk_buff *skb = skb_peek(&l->transmq); > struct tipc_msg *hdr; > > if (!skb) > return false; > - hdr = buf_msg(skb); > > - /* Detect repeated retransmit failures on same packet */ > - if (r->prev_from != from) { > - r->prev_from = from; > - r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); > - } else if (time_after(jiffies, r->stale_limit)) { > - pr_warn("Retransmission failure on link <%s>\n", l->name); > - link_print(l, "State of link "); > - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", > - msg_user(hdr), msg_type(hdr), msg_size(hdr), > - msg_errcode(hdr)); > - pr_info("sqno %u, prev: %x, src: %x\n", > - msg_seqno(hdr), msg_prevnode(hdr), > msg_orignode(hdr)); > - > - trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); > - trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); > - trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); > + if (!TIPC_SKB_CB(skb)->retr_cnt) > + return false; > > - if (link_is_bc_sndlink(l)) > - *rc = TIPC_LINK_DOWN_EVT; > + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + > + msecs_to_jiffies(r->tolerance))) > + return false; > + > + hdr = buf_msg(skb); > + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) > + return false; > > + pr_warn("Retransmission failure on link <%s>\n", l->name); > + link_print(l, "State of link "); > + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", > + msg_user(hdr), msg_type(hdr), msg_size(hdr), > msg_errcode(hdr)); > + pr_info("sqno %u, prev: %x, dest: %x\n", > + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); > + pr_info("retr_stamp %d, retr_cnt %d\n", > + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), > + TIPC_SKB_CB(skb)->retr_cnt); > + > + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); > + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); > + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); > + > + if (link_is_bc_sndlink(l)) { > + r->state = LINK_RESET; > + *rc = TIPC_LINK_DOWN_EVT; > + } else { > *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); > - return true; > } > > - return false; > + return true; > } > > /* tipc_link_bc_retrans() - retransmit zero or more packets @@ -1110,7 > +1112,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link > *r, > > trace_tipc_link_retrans(r, from, to, &l->transmq); > > - if (link_retransmit_failure(l, r, from, &rc)) > + if (link_retransmit_failure(l, r, &rc)) > return rc; > > skb_queue_walk(&l->transmq, skb) { > @@ -1119,11 +1121,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, > struct tipc_link *r, > continue; > if (more(msg_seqno(hdr), to)) > break; > - if (link_is_bc_sndlink(l)) { > - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) > - continue; > - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; > - } > + > + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) > + continue; > + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; > _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, > GFP_ATOMIC); > if (!_skb) > return 0; > @@ -1133,6 +1134,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l, > struct tipc_link *r, > _skb->priority = TC_PRIO_CONTROL; > __skb_queue_tail(xmitq, _skb); > l->stats.retransmitted++; > + > + /* Increase actual retrans counter & mark first time */ > + if (!TIPC_SKB_CB(skb)->retr_cnt++) > + TIPC_SKB_CB(skb)->retr_stamp = jiffies; > } > return 0; > } > @@ -1357,12 +1362,10 @@ static int tipc_link_advance_transmq(struct > tipc_link *l, u16 acked, u16 gap, > struct tipc_msg *hdr; > u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; > u16 ack = l->rcv_nxt - 1; > + bool passed = false; > u16 seqno, n = 0; > int rc = 0; > > - if (gap && link_retransmit_failure(l, l, acked + 1, &rc)) > - return rc; > - > skb_queue_walk_safe(&l->transmq, skb, tmp) { > seqno = buf_seqno(skb); > > @@ -1372,12 +1375,17 @@ static int tipc_link_advance_transmq(struct > tipc_link *l, u16 acked, u16 gap, > __skb_unlink(skb, &l->transmq); > kfree_skb(skb); > } else if (less_eq(seqno, acked + gap)) { > - /* retransmit skb */ > + /* First, check if repeated retrans failures occurs? */ > + if (!passed && link_retransmit_failure(l, l, &rc)) > + return rc; > + passed = true; > + > + /* retransmit skb if unrestricted*/ > if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) > continue; > TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; > - > - _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); > + _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, > + GFP_ATOMIC); > if (!_skb) > continue; > hdr = buf_msg(_skb); > @@ -1386,6 +1394,10 @@ static int tipc_link_advance_transmq(struct > tipc_link *l, u16 acked, u16 gap, > _skb->priority = TC_PRIO_CONTROL; > __skb_queue_tail(xmitq, _skb); > l->stats.retransmitted++; > + > + /* Increase actual retrans counter & mark first time */ > + if (!TIPC_SKB_CB(skb)->retr_cnt++) > + TIPC_SKB_CB(skb)->retr_stamp = jiffies; > } else { > /* retry with Gap ACK blocks if any */ > if (!ga || n >= ga->gack_cnt) > @@ -2577,7 +2589,7 @@ int tipc_link_dump(struct tipc_link *l, u16 > dqueues, char *buf) > i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); > i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); > i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); > - i += scnprintf(buf + i, sz - i, " %u", l->prev_from); > + i += scnprintf(buf + i, sz - i, " %u", 0); > i += scnprintf(buf + i, sz - i, " %u", 0); > i += scnprintf(buf + i, sz - i, " %u", l->acked); > > diff --git a/net/tipc/msg.h b/net/tipc/msg.h index > da509f0eb9ca..d7ebc9e955f6 100644 > --- a/net/tipc/msg.h > +++ b/net/tipc/msg.h > @@ -102,13 +102,15 @@ struct plist; > #define TIPC_MEDIA_INFO_OFFSET 5 > > struct tipc_skb_cb { > - u32 bytes_read; > - u32 orig_member; > struct sk_buff *tail; > unsigned long nxt_retr; > - bool validated; > + unsigned long retr_stamp; > + u32 bytes_read; > + u32 orig_member; > u16 chain_imp; > u16 ackers; > + u16 retr_cnt; > + bool validated; > }; > > #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) > -- > 2.13.7 |