From: Ying X. <yin...@wi...> - 2017-03-09 14:22:20
|
After a subscription object is created, it's inserted into its subscriber subscrp_list list under subscriber lock protection, similarly, before it's destroyed, it should be first removed from its subscriber->subscrp_list. Signed-off-by: Ying Xue <yin...@wi...> Reviewed-by: Jon Maloy <jon...@er...> --- net/tipc/subscr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65..e70e7ba 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -174,7 +174,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) spin_lock_bh(&subscriber->lock); tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); kfree(sub); @@ -205,6 +204,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; + list_del(&sub->subscrp_list); + tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); @@ -305,6 +306,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); + sub->subscriber = subscriber; tipc_nametbl_subscribe(sub); tipc_subscrb_get(subscriber); -- 2.7.4 |
From: Ying X. <yin...@wi...> - 2017-03-09 14:22:20
|
When a new subscription object is inserted into name_seq->subscriptions list, it's under name_seq->lock protection; when a subscription is deleted from the list, it's also under the same lock protection; similarly, when accessing a subscription by going through subscriptions list, the entire process is also protected by the name_seq->lock. Therefore, if subscription refcount is increased before it's inserted into subscriptions list, and its refcount is decreased after it's deleted from the list, it will be unnecessary to hold refcount at all before accessing subscription object which is obtained by going through subscriptions list under name_seq->lock protection. Signed-off-by: Ying Xue <yin...@wi...> Reviewed-by: Jon Maloy <jon...@er...> --- net/tipc/name_table.c | 2 ++ net/tipc/subscr.c | 8 ++------ net/tipc/subscr.h | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9be6592..bd0aac8 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); if (!sseq) @@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); + tipc_subscrp_put(s); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index e70e7ba..111d33c6 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,8 +54,6 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); -static void tipc_subscrp_put(struct tipc_subscription *subscription); -static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; - tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); - tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) @@ -180,12 +176,12 @@ static void tipc_subscrp_kref_release(struct kref *kref) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_subscrp_put(struct tipc_subscription *subscription) { kref_put(&subscription->kref, tipc_subscrp_kref_release); } -static void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_subscrp_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ffdc214..ee52957 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); +void tipc_subscrp_put(struct tipc_subscription *subscription); +void tipc_subscrp_get(struct tipc_subscription *subscription); + #endif -- 2.7.4 |
[tipc-discussion] [PATCH v2 net-next 4/6] tipc: advance the time of
calling tipc_nametbl_unsubscribe
From: Ying X. <yin...@wi...> - 2017-03-09 14:22:23
|
When subscription is inserted into subscriber->subscrp_list, the subscription is also inserted to name sequence subscriptions list. Now when subscription is deleted from subscriber->subscrp_list in tipc_subscrb_subscrp_delete(), it's also a proper time to delete the subscription from name sequence subscriptions list too. Signed-off-by: Ying Xue <yin...@wi...> Reviewed-by: Jon Maloy <jon...@er...> --- net/tipc/subscr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ffd7b9d..6624232 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -169,7 +169,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); kfree(sub); @@ -202,6 +201,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, list_del(&sub->subscrp_list); + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); -- 2.7.4 |
From: Ying X. <yin...@wi...> - 2017-03-09 14:22:24
|
After the design policy of subscription timeout is adjusted, subscription is still present in name table after it's expired, and hence the subscriber will receive new events for this subscription until the subscriber terminates. Now when subscription is expired, it will be first deleted from name table and its subscriber, and then schedule its work. Finally the subscription will be destroyed in workqueue asynchronously. Signed-off-by: Ying Xue <yin...@wi...> --- net/tipc/server.h | 2 ++ net/tipc/subscr.c | 32 ++++++++++++++++++++++++++++++++ net/tipc/subscr.h | 1 + 3 files changed, 35 insertions(+) diff --git a/net/tipc/server.h b/net/tipc/server.h index 34f8055..7d5588f 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -59,6 +59,7 @@ * @name: server name * @imp: message importance * @type: socket type + * @wq: workqueue */ struct tipc_server { struct idr conn_idr; @@ -78,6 +79,7 @@ struct tipc_server { char name[TIPC_SERVER_NAME_LEN]; int imp; int type; + struct workqueue_struct *wq; }; int tipc_conn_sendmsg(struct tipc_server *s, int conid, diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ea67cce..3dcb009 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -137,14 +137,34 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); + spin_lock_bh(&subscriber->lock); + list_del(&sub->subscrp_list); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(&subscriber->lock); + + queue_work(tn->topsrv->wq, &sub->work); + tipc_subscrp_put(sub); } +static void tipc_subscrp_work(struct work_struct *work) +{ + struct tipc_subscription *sub = container_of(work, + struct tipc_subscription, work); + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_subscrp_delete(sub); + spin_unlock_bh(&subscriber->lock); +} + static void tipc_subscrb_kref_release(struct kref *kref) { kfree(container_of(kref,struct tipc_subscriber, kref)); @@ -282,6 +302,9 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); kref_init(&sub->kref); + + INIT_WORK(&sub->work, tipc_subscrp_work); + return sub; } @@ -379,6 +402,13 @@ int tipc_topsrv_start(struct net *net) topsrv->tipc_conn_release = tipc_subscrb_release_cb; strncpy(topsrv->name, name, strlen(name) + 1); + topsrv->wq = create_singlethread_workqueue(topsrv->name); + if (!topsrv->wq) { + kfree(saddr); + kfree(topsrv); + return -ENOMEM; + } + tn->topsrv = topsrv; atomic_set(&tn->subscription_count, 0); @@ -391,6 +421,8 @@ void tipc_topsrv_stop(struct net *net) struct tipc_server *topsrv = tn->topsrv; tipc_server_stop(topsrv); + flush_workqueue(topsrv->wq); + destroy_workqueue(topsrv->wq); kfree(topsrv->saddr); kfree(topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ee52957..50fc64b 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -65,6 +65,7 @@ struct tipc_subscription { struct list_head subscrp_list; int swap; struct tipc_event evt; + struct work_struct work; }; int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, -- 2.7.4 |
From: Ying X. <yin...@wi...> - 2017-03-09 14:22:24
|
As the policy of holding subscription in subscriber time has been adjusted, it's unnecessary to hold subscription refcount before tipc_subscrp_delete() is called. As a consequence, subscriber->lock can be safely removed to avoid the following two deadlock scenarios: CPU1: CPU2: ---------- ---------------- tipc_nametbl_publish spin_lock_bh(&tn->nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <<grab nametbl_lock again>> CPU1: CPU2: ---------- ---------------- tipc_nametbl_stop spin_lock_bh(&tn->nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <<grab nametbl_lock again>> Reported-by: John Thompson <tho...@gm...> Reported-by: Jon Maloy <jon...@er...> Signed-off-by: Ying Xue <yin...@wi...> Reviewed-by: Jon Maloy <jon...@er...> --- net/tipc/subscr.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6624232..ea67cce 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -168,9 +168,7 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; - spin_lock_bh(&subscriber->lock); atomic_dec(&tn->subscription_count); - spin_unlock_bh(&subscriber->lock); kfree(sub); tipc_subscrb_put(subscriber); } @@ -202,11 +200,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, list_del(&sub->subscrp_list); tipc_nametbl_unsubscribe(sub); - tipc_subscrp_get(sub); - spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); - tipc_subscrp_put(sub); - spin_lock_bh(&subscriber->lock); if (s) break; -- 2.7.4 |
From: Ying X. <yin...@wi...> - 2017-03-09 14:37:29
|
To be honest, I don't very like this solution. But I cannot think one solution is better than this one although I tried at least the following two methods to solve the issue. - Introduce one timeout list to subscriber. When subscription is expired, it will be moved to its timeout list. When name service is deleted or inserted, we are going to release all subscriptions linked in timeout list. After I tried to do the experiment, it's found we have to make a big change for current code; - In subscription's timeout handler, when subsription timeout event is sent to server through tipc_subscrp_send_event(), we can add a new flag in the message. When server sends the timeout message to user space send workqueue, it can send back a TIPC_SUB_CANCEL message to subscriber through tipc_subscrb_rcv_cb() if that new flag is set. When the TIPC_SUB_CANCEL message arrives at subscriber, the expired subscription can be deleted at the moment. The approach is still infeasible due to the same reason as previous one. In all, this solution is a bit elegant compared to above two. If you have any suggestion, please let me know. Thanks, Ying On 03/09/2017 10:21 PM, Ying Xue wrote: > After the design policy of subscription timeout is adjusted, > subscription is still present in name table after it's expired, and > hence the subscriber will receive new events for this subscription > until the subscriber terminates. > > Now when subscription is expired, it will be first deleted from name > table and its subscriber, and then schedule its work. Finally the > subscription will be destroyed in workqueue asynchronously. > > Signed-off-by: Ying Xue <yin...@wi...> > --- > net/tipc/server.h | 2 ++ > net/tipc/subscr.c | 32 ++++++++++++++++++++++++++++++++ > net/tipc/subscr.h | 1 + > 3 files changed, 35 insertions(+) > > diff --git a/net/tipc/server.h b/net/tipc/server.h > index 34f8055..7d5588f 100644 > --- a/net/tipc/server.h > +++ b/net/tipc/server.h > @@ -59,6 +59,7 @@ > * @name: server name > * @imp: message importance > * @type: socket type > + * @wq: workqueue > */ > struct tipc_server { > struct idr conn_idr; > @@ -78,6 +79,7 @@ struct tipc_server { > char name[TIPC_SERVER_NAME_LEN]; > int imp; > int type; > + struct workqueue_struct *wq; > }; > > int tipc_conn_sendmsg(struct tipc_server *s, int conid, > diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c > index ea67cce..3dcb009 100644 > --- a/net/tipc/subscr.c > +++ b/net/tipc/subscr.c > @@ -137,14 +137,34 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, > static void tipc_subscrp_timeout(unsigned long data) > { > struct tipc_subscription *sub = (struct tipc_subscription *)data; > + struct tipc_subscriber *subscriber = sub->subscriber; > + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); > > /* Notify subscriber of timeout */ > tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, > TIPC_SUBSCR_TIMEOUT, 0, 0); > > + spin_lock_bh(&subscriber->lock); > + list_del(&sub->subscrp_list); > + tipc_nametbl_unsubscribe(sub); > + spin_unlock_bh(&subscriber->lock); > + > + queue_work(tn->topsrv->wq, &sub->work); > + > tipc_subscrp_put(sub); > } > > +static void tipc_subscrp_work(struct work_struct *work) > +{ > + struct tipc_subscription *sub = container_of(work, > + struct tipc_subscription, work); > + struct tipc_subscriber *subscriber = sub->subscriber; > + > + spin_lock_bh(&subscriber->lock); > + tipc_subscrp_delete(sub); > + spin_unlock_bh(&subscriber->lock); > +} > + > static void tipc_subscrb_kref_release(struct kref *kref) > { > kfree(container_of(kref,struct tipc_subscriber, kref)); > @@ -282,6 +302,9 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, > memcpy(&sub->evt.s, s, sizeof(*s)); > atomic_inc(&tn->subscription_count); > kref_init(&sub->kref); > + > + INIT_WORK(&sub->work, tipc_subscrp_work); > + > return sub; > } > > @@ -379,6 +402,13 @@ int tipc_topsrv_start(struct net *net) > topsrv->tipc_conn_release = tipc_subscrb_release_cb; > > strncpy(topsrv->name, name, strlen(name) + 1); > + topsrv->wq = create_singlethread_workqueue(topsrv->name); > + if (!topsrv->wq) { > + kfree(saddr); > + kfree(topsrv); > + return -ENOMEM; > + } > + > tn->topsrv = topsrv; > atomic_set(&tn->subscription_count, 0); > > @@ -391,6 +421,8 @@ void tipc_topsrv_stop(struct net *net) > struct tipc_server *topsrv = tn->topsrv; > > tipc_server_stop(topsrv); > + flush_workqueue(topsrv->wq); > + destroy_workqueue(topsrv->wq); > kfree(topsrv->saddr); > kfree(topsrv); > } > diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h > index ee52957..50fc64b 100644 > --- a/net/tipc/subscr.h > +++ b/net/tipc/subscr.h > @@ -65,6 +65,7 @@ struct tipc_subscription { > struct list_head subscrp_list; > int swap; > struct tipc_event evt; > + struct work_struct work; > }; > > int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, > |
From: John T. <tho...@gm...> - 2017-03-09 21:11:26
|
Thanks Ying. I am away for a week and will run some tests next when back. Regards John On 10/03/2017 3:22 AM, "Ying Xue" <yin...@wi...> wrote: > commit d094c4d5f5 ("tipc: add subscription refcount to avoid invalid > delete") accidently introduce the following deadlock scenarios: > > CPU1: CPU2: > ---------- ---------------- > tipc_nametbl_publish > spin_lock_bh(&tn->nametbl_lock) > tipc_nametbl_insert_publ > tipc_nameseq_insert_publ > tipc_subscrp_report_overlap > tipc_subscrp_get > tipc_subscrp_send_event > tipc_close_conn > tipc_subscrb_release_cb > tipc_subscrb_delete > tipc_subscrp_put > tipc_subscrp_put > tipc_subscrp_kref_release > tipc_nametbl_unsubscribe > spin_lock_bh(&tn->nametbl_lock) > <<grab nametbl_lock again>> > > CPU1: CPU2: > ---------- ---------------- > tipc_nametbl_stop > spin_lock_bh(&tn->nametbl_lock) > tipc_purge_publications > tipc_nameseq_remove_publ > tipc_subscrp_report_overlap > tipc_subscrp_get > tipc_subscrp_send_event > tipc_close_conn > tipc_subscrb_release_cb > tipc_subscrb_delete > tipc_subscrp_put > tipc_subscrp_put > tipc_subscrp_kref_release > tipc_nametbl_unsubscribe > spin_lock_bh(&tn->nametbl_lock) > <<grab nametbl_lock again>> > > The root cause of two deadlocks is that we have to hold nametbl lock > when subscription is freed in tipc_subscrp_kref_release(). In order to > eliminate the need of taking nametbl lock in > tipc_subscrp_kref_release(), the functions protected by nametbl lock > in tipc_subscrp_kref_release() are moved to other places step by step > in the series. > > Change log: > v2: > As Parth's comments, subscription is still present name table after > it's expired. To fix it, we introduce a workqueue, and when > subscription's timer is expired, the subscription will be pushed to > the workqueue through its work. When the work is scheduled, the > subscription be deleted finally. > > Ying Xue (6): > tipc: advance the time of deleting subscription from > subscriber->subscrp_list > tipc: adjust the policy of holding subscription kref > tipc: adjust policy that sub->timer holds subscription kref > tipc: advance the time of calling tipc_nametbl_unsubscribe > tipc: remove unnecessary increasement of subscription refcount > tipc: delete expired subscription > > net/tipc/name_table.c | 2 ++ > net/tipc/server.h | 2 ++ > net/tipc/subscr.c | 64 ++++++++++++++++++++++++++++++ > ++++++--------------- > net/tipc/subscr.h | 4 ++++ > 4 files changed, 54 insertions(+), 18 deletions(-) > > -- > 2.7.4 > > |
From: Ying X. <yin...@wi...> - 2017-03-10 10:20:10
|
Thanks so much John! I am looking forward to seeing your final test results. Thanks, Ying On 03/10/2017 05:11 AM, John Thompson wrote: > Thanks Ying. > I am away for a week and will run some tests next when back. > Regards > John > > On 10/03/2017 3:22 AM, "Ying Xue" <yin...@wi... > <mailto:yin...@wi...>> wrote: > > commit d094c4d5f5 ("tipc: add subscription refcount to avoid invalid > delete") accidently introduce the following deadlock scenarios: > > CPU1: CPU2: > ---------- ---------------- > tipc_nametbl_publish > spin_lock_bh(&tn->nametbl_lock) > tipc_nametbl_insert_publ > tipc_nameseq_insert_publ > tipc_subscrp_report_overlap > tipc_subscrp_get > tipc_subscrp_send_event > tipc_close_conn > tipc_subscrb_release_cb > tipc_subscrb_delete > tipc_subscrp_put > tipc_subscrp_put > tipc_subscrp_kref_release > tipc_nametbl_unsubscribe > spin_lock_bh(&tn->nametbl_lock) > <<grab nametbl_lock again>> > > CPU1: CPU2: > ---------- ---------------- > tipc_nametbl_stop > spin_lock_bh(&tn->nametbl_lock) > tipc_purge_publications > tipc_nameseq_remove_publ > tipc_subscrp_report_overlap > tipc_subscrp_get > tipc_subscrp_send_event > tipc_close_conn > tipc_subscrb_release_cb > tipc_subscrb_delete > tipc_subscrp_put > tipc_subscrp_put > tipc_subscrp_kref_release > tipc_nametbl_unsubscribe > spin_lock_bh(&tn->nametbl_lock) > <<grab nametbl_lock again>> > > The root cause of two deadlocks is that we have to hold nametbl lock > when subscription is freed in tipc_subscrp_kref_release(). In order to > eliminate the need of taking nametbl lock in > tipc_subscrp_kref_release(), the functions protected by nametbl lock > in tipc_subscrp_kref_release() are moved to other places step by step > in the series. > > Change log: > v2: > As Parth's comments, subscription is still present name table after > it's expired. To fix it, we introduce a workqueue, and when > subscription's timer is expired, the subscription will be pushed to > the workqueue through its work. When the work is scheduled, the > subscription be deleted finally. > > Ying Xue (6): > tipc: advance the time of deleting subscription from > subscriber->subscrp_list > tipc: adjust the policy of holding subscription kref > tipc: adjust policy that sub->timer holds subscription kref > tipc: advance the time of calling tipc_nametbl_unsubscribe > tipc: remove unnecessary increasement of subscription refcount > tipc: delete expired subscription > > net/tipc/name_table.c | 2 ++ > net/tipc/server.h | 2 ++ > net/tipc/subscr.c | 64 > ++++++++++++++++++++++++++++++++++++--------------- > net/tipc/subscr.h | 4 ++++ > 4 files changed, 54 insertions(+), 18 deletions(-) > > -- > 2.7.4 > |
From: Parthasarathy B. <par...@er...> - 2017-03-13 17:43:53
|
Hi Ying, I have a new patch sets which fixes this issue using fixes from your patches. It deviates from your patch the following way: In my solution, the subscription refcount keeps track of a subscription with or without timer. I do not increment refcount for timer, and use the subscriber lock plus the del_timer to find outstanding timers. I will send the series shortly. I applied your series and ran some tests with it. If I run test against each patch individually, they seem to introduce new warnings/panics. I think every patch should be as correct as possible. I tried to moved around the patches in this series to get every patch correct, which led me to my series above. 1. tipc: advance the time of deleting subscription from subscriber->subscrp_list I get several warnings like: WARNING: CPU: 15 PID: 356 at lib/refcount.c:128 refcount_sub_and_test+0x5e/0x70 refcount_t: underflow; use-after-free. Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio CPU: 15 PID: 356 Comm: topsrv_tester Tainted: G W 4.10.0+ #111 Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 Call Trace: dump_stack+0x4d/0x72 __warn+0xd1/0xf0 warn_slowpath_fmt+0x4f/0x60 ? conn_put+0x12/0x30 [tipc] refcount_sub_and_test+0x5e/0x70 refcount_dec_and_test+0x11/0x20 tipc_subscrp_put+0x12/0x30 [tipc] tipc_subscrp_report_overlap+0xa7/0xc0 [tipc] tipc_nameseq_remove_publ+0x179/0x210 [tipc] tipc_nametbl_remove_publ+0x59/0xf0 [tipc] tipc_nametbl_withdraw+0x6c/0x130 [tipc] tipc_sk_withdraw+0xc0/0x100 [tipc] tipc_release+0x56/0x290 [tipc] sock_release+0x1f/0x80 sock_close+0x12/0x20 __fput+0xbe/0x200 ____fput+0xe/0x10 task_work_run+0x7e/0xb0 do_exit+0x3fd/0xc10 ? __do_page_fault+0x27a/0x500 do_group_exit+0x43/0xc0 SyS_exit_group+0x14/0x20 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x7fcc289d22e9 RSP: 002b:00007ffc804f0ef8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fcc289d22e9 RDX: 0000000000000000 RSI: 00007fcc28cbe323 RDI: 0000000000000000 RBP: 00007fcc28cb9860 R08: 000000000000003c R09: 00000000000000e7 R10: ffffffffffffff90 R11: 0000000000000246 R12: 00007fcc28cb9860 R13: 00007fcc28cbec60 R14: 0000000000000000 R15: 0000000000000000 And, the one below as you forgot to delete the subscription list at subscription timeout. general protection fault: 0000 [#1] SMP Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio CPU: 17 PID: 222 Comm: kworker/u40:2 Tainted: G W 4.10.0+ #111 Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 Workqueue: tipc_send tipc_send_work [tipc] task: ffff880c444eef00 task.stack: ffffc90007734000 RIP: 0010:tipc_subscrb_subscrp_delete+0x6c/0x110 [tipc] RSP: 0018:ffffc90007737d48 EFLAGS: 00010246 RAX: dead000000000200 RBX: ffff880c3cebd480 RCX: 0000000000000101 RDX: dead000000000100 RSI: 0000000000000001 RDI: ffff880c3cebd480 RBP: ffffc90007737d70 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000011 R11: 0000000000000000 R12: dead0000000000a8 R13: 0000000000000000 R14: ffff880c44638088 R15: ffff880c446380a0 FS: 0000000000000000(0000) GS:ffff880c7f440000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000084f0c0 CR3: 0000000001c0b000 CR4: 00000000001406e0 Call Trace: tipc_subscrb_release_cb+0x17/0x30 [tipc] tipc_close_conn+0x80/0x90 [tipc] tipc_send_work+0x1a7/0x1b0 [tipc] process_one_work+0x145/0x410 worker_thread+0x69/0x4c0 kthread+0x128/0x160 ? process_one_work+0x410/0x410 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x29/0x40 Code: 49 89 c4 4d 85 ed 74 18 48 8d b3 80 00 00 00 ba 1c 00 00 00 4c 89 ef e8 e3 51 2e e1 85 c0 75 76 48 8b 53 58 48 8b 43 60 48 89 df <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 43 58 RIP: tipc_subscrb_subscrp_delete+0x6c/0x110 [tipc] RSP: ffffc90007737d48 2. tipc: adjust policy that sub->timer holds subscription kref My testcases fail, i think the waitq should fix it. So these commits should be squashed together. - create max_subscriptions for a non existent subscriber - wait for subscription timeout - create again the same max subscription and cancel them. They fail as: [16761.924321] Subscription rejected, limit reached (65535) [16761.929661] Subscription rejected, limit reached (65535) [16761.934966] Subscription rejected, limit reached (65535) [16761.940281] Subscription rejected, limit reached (65535) And with one of the your patches (the last one "tipc: delete expired subscription" which fixed it), I got: general protection fault: 0000 [#1] SMP Modules linked in: tipc(-) ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio [last unloaded: ip6_udp_tunnel] CPU: 15 PID: 8105 Comm: modprobe Tainted: G W 4.10.0+ #111 Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 task: ffff880c3c4fe2c0 task.stack: ffffc9002227c000 RIP: 0010:tipc_nametbl_stop+0xf9/0x190 [tipc] RSP: 0018:ffffc9002227fe00 EFLAGS: 00010286 RAX: 0200000000000000 RBX: ffff880c4d8f5700 RCX: ffff880c3c5b8ab0 RDX: ffffffff81c56cc0 RSI: 00000000000000c3 RDI: ffffea002ad57750 RBP: ffffc9002227fe50 R08: 00000000ffff880c R09: 00000000ffffffff R10: 00000000ffffffff R11: 0000000000000000 R12: 01ffffffffffff90 R13: ffffffff81d02e00 R14: ffff880c4bba1360 R15: 01ffffffffffff90 FS: 0000000000852880(0000) GS:ffff880c7f3c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000878038 CR3: 0000000c3bda5000 CR4: 00000000001406e0 Call Trace: tipc_exit_net+0x2a/0x40 [tipc] ops_exit_list.isra.8+0x38/0x60 unregister_pernet_operations+0x90/0xe0 unregister_pernet_subsys+0x21/0x30 tipc_exit+0x15/0xc6d [tipc] SyS_delete_module+0x192/0x200 ? SyS_read+0x49/0xb0 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x4abc07 RSP: 002b:00007ffd9adbfe18 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000000000877e20 RCX: 00000000004abc07 RDX: 000000000000002e RSI: 0000000000000880 RDI: 00007ffd9adbfe20 RBP: 0000000000000110 R08: 0000000000000004 R09: 0000000000000000 R10: 000000000084cd30 R11: 0000000000000246 R12: 000000000084cce0 R13: 0000000000877f30 R14: 0000000000000200 R15: 0000000000000110 Code: 45 8b 4c 24 18 45 8b 44 24 14 4c 89 ef e8 40 ee ff ff 49 8d bc 24 80 00 00 00 be 80 00 00 00 4d 89 fc e8 3b 16 05 e1 49 8d 47 70 <49> 8b 57 70 4c 39 f0 4c 8d 7a 90 75 bb 48 8b 43 20 48 85 c0 74 RIP: tipc_nametbl_stop+0xf9/0x190 [tipc] RSP: ffffc9002227fe00 /Partha On 03/09/2017 03:37 PM, Ying Xue wrote: > To be honest, I don't very like this solution. But I cannot think one > solution is better than this one although I tried at least the following > two methods to solve the issue. > > - Introduce one timeout list to subscriber. When subscription is > expired, it will be moved to its timeout list. When name service is > deleted or inserted, we are going to release all subscriptions linked in > timeout list. After I tried to do the experiment, it's found we have to > make a big change for current code; > > - In subscription's timeout handler, when subsription timeout event is > sent to server through tipc_subscrp_send_event(), we can add a new flag > in the message. When server sends the timeout message to user space send > workqueue, it can send back a TIPC_SUB_CANCEL message to subscriber > through tipc_subscrb_rcv_cb() if that new flag is set. When the > TIPC_SUB_CANCEL message arrives at subscriber, the expired subscription > can be deleted at the moment. The approach is still infeasible due to > the same reason as previous one. > > In all, this solution is a bit elegant compared to above two. > > If you have any suggestion, please let me know. > > Thanks, > Ying > > On 03/09/2017 10:21 PM, Ying Xue wrote: >> After the design policy of subscription timeout is adjusted, >> subscription is still present in name table after it's expired, and >> hence the subscriber will receive new events for this subscription >> until the subscriber terminates. >> >> Now when subscription is expired, it will be first deleted from name >> table and its subscriber, and then schedule its work. Finally the >> subscription will be destroyed in workqueue asynchronously. >> >> Signed-off-by: Ying Xue <yin...@wi...> >> --- >> net/tipc/server.h | 2 ++ >> net/tipc/subscr.c | 32 ++++++++++++++++++++++++++++++++ >> net/tipc/subscr.h | 1 + >> 3 files changed, 35 insertions(+) >> >> diff --git a/net/tipc/server.h b/net/tipc/server.h >> index 34f8055..7d5588f 100644 >> --- a/net/tipc/server.h >> +++ b/net/tipc/server.h >> @@ -59,6 +59,7 @@ >> * @name: server name >> * @imp: message importance >> * @type: socket type >> + * @wq: workqueue >> */ >> struct tipc_server { >> struct idr conn_idr; >> @@ -78,6 +79,7 @@ struct tipc_server { >> char name[TIPC_SERVER_NAME_LEN]; >> int imp; >> int type; >> + struct workqueue_struct *wq; >> }; >> >> int tipc_conn_sendmsg(struct tipc_server *s, int conid, >> diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c >> index ea67cce..3dcb009 100644 >> --- a/net/tipc/subscr.c >> +++ b/net/tipc/subscr.c >> @@ -137,14 +137,34 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, >> static void tipc_subscrp_timeout(unsigned long data) >> { >> struct tipc_subscription *sub = (struct tipc_subscription *)data; >> + struct tipc_subscriber *subscriber = sub->subscriber; >> + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); >> >> /* Notify subscriber of timeout */ >> tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, >> TIPC_SUBSCR_TIMEOUT, 0, 0); >> >> + spin_lock_bh(&subscriber->lock); >> + list_del(&sub->subscrp_list); >> + tipc_nametbl_unsubscribe(sub); >> + spin_unlock_bh(&subscriber->lock); >> + >> + queue_work(tn->topsrv->wq, &sub->work); >> + >> tipc_subscrp_put(sub); >> } >> >> +static void tipc_subscrp_work(struct work_struct *work) >> +{ >> + struct tipc_subscription *sub = container_of(work, >> + struct tipc_subscription, work); >> + struct tipc_subscriber *subscriber = sub->subscriber; >> + >> + spin_lock_bh(&subscriber->lock); >> + tipc_subscrp_delete(sub); >> + spin_unlock_bh(&subscriber->lock); >> +} >> + >> static void tipc_subscrb_kref_release(struct kref *kref) >> { >> kfree(container_of(kref,struct tipc_subscriber, kref)); >> @@ -282,6 +302,9 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, >> memcpy(&sub->evt.s, s, sizeof(*s)); >> atomic_inc(&tn->subscription_count); >> kref_init(&sub->kref); >> + >> + INIT_WORK(&sub->work, tipc_subscrp_work); >> + >> return sub; >> } >> >> @@ -379,6 +402,13 @@ int tipc_topsrv_start(struct net *net) >> topsrv->tipc_conn_release = tipc_subscrb_release_cb; >> >> strncpy(topsrv->name, name, strlen(name) + 1); >> + topsrv->wq = create_singlethread_workqueue(topsrv->name); >> + if (!topsrv->wq) { >> + kfree(saddr); >> + kfree(topsrv); >> + return -ENOMEM; >> + } >> + >> tn->topsrv = topsrv; >> atomic_set(&tn->subscription_count, 0); >> >> @@ -391,6 +421,8 @@ void tipc_topsrv_stop(struct net *net) >> struct tipc_server *topsrv = tn->topsrv; >> >> tipc_server_stop(topsrv); >> + flush_workqueue(topsrv->wq); >> + destroy_workqueue(topsrv->wq); >> kfree(topsrv->saddr); >> kfree(topsrv); >> } >> diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h >> index ee52957..50fc64b 100644 >> --- a/net/tipc/subscr.h >> +++ b/net/tipc/subscr.h >> @@ -65,6 +65,7 @@ struct tipc_subscription { >> struct list_head subscrp_list; >> int swap; >> struct tipc_event evt; >> + struct work_struct work; >> }; >> >> int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, >> |
From: Ying X. <yin...@wi...> - 2017-03-15 11:34:17
|
Hi Partha, Thank you for the review and improvement. On 03/14/2017 01:43 AM, Parthasarathy Bhuvaragan wrote: > Hi Ying, > > I have a new patch sets which fixes this issue using fixes from your > patches. It deviates from your patch the following way: > In my solution, the subscription refcount keeps track of a subscription > with or without timer. I do not increment refcount for timer, and use > the subscriber lock plus the del_timer to find outstanding timers. I > will send the series shortly. I have carefully reviewed your solution as well as your revised patches. Your method is pretty simpler than mine. Instead the thing I image is too complex. Now I can confirm that it's unnecessary to increase subscription refcount before its timer is started, and it's absolutely safe for us now. Good work Parth! > > I applied your series and ran some tests with it. If I run test against > each patch individually, they seem to introduce new warnings/panics. I > think every patch should be as correct as possible. I tried to moved > around the patches in this series to get every patch correct, which led > me to my series above. > If we can ensure every single patch of a patchset can independently work very well, that's very good. But in many cases, it's hard to reach that goal. The most reason is that on one hand, we must have patch easily readable for reviewer, on another hand, we must keep every single work well. So it is sometimes very hard. Anyway, your revised patches are very good. If Jon or other guys have no any different opinion, please submit them to net-next as soon as possible. Of course, if possible, please let John verify them again. Thanks, Ying > 1. tipc: advance the time of deleting subscription from > subscriber->subscrp_list > I get several warnings like: > WARNING: CPU: 15 PID: 356 at lib/refcount.c:128 > refcount_sub_and_test+0x5e/0x70 > refcount_t: underflow; use-after-free. > Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet > virtio_net virtio_pci virtio_ring virtio > CPU: 15 PID: 356 Comm: topsrv_tester Tainted: G W 4.10.0+ #111 > Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 > Call Trace: > dump_stack+0x4d/0x72 > __warn+0xd1/0xf0 > warn_slowpath_fmt+0x4f/0x60 > ? conn_put+0x12/0x30 [tipc] > refcount_sub_and_test+0x5e/0x70 > refcount_dec_and_test+0x11/0x20 > tipc_subscrp_put+0x12/0x30 [tipc] > tipc_subscrp_report_overlap+0xa7/0xc0 [tipc] > tipc_nameseq_remove_publ+0x179/0x210 [tipc] > tipc_nametbl_remove_publ+0x59/0xf0 [tipc] > tipc_nametbl_withdraw+0x6c/0x130 [tipc] > tipc_sk_withdraw+0xc0/0x100 [tipc] > tipc_release+0x56/0x290 [tipc] > sock_release+0x1f/0x80 > sock_close+0x12/0x20 > __fput+0xbe/0x200 > ____fput+0xe/0x10 > task_work_run+0x7e/0xb0 > do_exit+0x3fd/0xc10 > ? __do_page_fault+0x27a/0x500 > do_group_exit+0x43/0xc0 > SyS_exit_group+0x14/0x20 > entry_SYSCALL_64_fastpath+0x13/0x94 > RIP: 0033:0x7fcc289d22e9 > RSP: 002b:00007ffc804f0ef8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 > RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fcc289d22e9 > RDX: 0000000000000000 RSI: 00007fcc28cbe323 RDI: 0000000000000000 > RBP: 00007fcc28cb9860 R08: 000000000000003c R09: 00000000000000e7 > R10: ffffffffffffff90 R11: 0000000000000246 R12: 00007fcc28cb9860 > R13: 00007fcc28cbec60 R14: 0000000000000000 R15: 0000000000000000 > > And, the one below as you forgot to delete the subscription list at > subscription timeout. > general protection fault: 0000 [#1] SMP > Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet > virtio_net virtio_pci virtio_ring virtio > CPU: 17 PID: 222 Comm: kworker/u40:2 Tainted: G W 4.10.0+ #111 > Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 > Workqueue: tipc_send tipc_send_work [tipc] > task: ffff880c444eef00 task.stack: ffffc90007734000 > RIP: 0010:tipc_subscrb_subscrp_delete+0x6c/0x110 [tipc] > RSP: 0018:ffffc90007737d48 EFLAGS: 00010246 > RAX: dead000000000200 RBX: ffff880c3cebd480 RCX: 0000000000000101 > RDX: dead000000000100 RSI: 0000000000000001 RDI: ffff880c3cebd480 > RBP: ffffc90007737d70 R08: 0000000000000001 R09: 0000000000000000 > R10: 0000000000000011 R11: 0000000000000000 R12: dead0000000000a8 > R13: 0000000000000000 R14: ffff880c44638088 R15: ffff880c446380a0 > FS: 0000000000000000(0000) GS:ffff880c7f440000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 000000000084f0c0 CR3: 0000000001c0b000 CR4: 00000000001406e0 > Call Trace: > tipc_subscrb_release_cb+0x17/0x30 [tipc] > tipc_close_conn+0x80/0x90 [tipc] > tipc_send_work+0x1a7/0x1b0 [tipc] > process_one_work+0x145/0x410 > worker_thread+0x69/0x4c0 > kthread+0x128/0x160 > ? process_one_work+0x410/0x410 > ? kthread_create_on_node+0x40/0x40 > ret_from_fork+0x29/0x40 > Code: 49 89 c4 4d 85 ed 74 18 48 8d b3 80 00 00 00 ba 1c 00 00 00 4c 89 > ef e8 e3 51 2e e1 85 c0 75 76 48 8b 53 58 48 8b 43 60 48 89 df <48> 89 > 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 43 58 > RIP: tipc_subscrb_subscrp_delete+0x6c/0x110 [tipc] RSP: ffffc90007737d48 > > 2. tipc: adjust policy that sub->timer holds subscription kref > My testcases fail, i think the waitq should fix it. So these commits > should be squashed together. > - create max_subscriptions for a non existent subscriber > - wait for subscription timeout > - create again the same max subscription and cancel them. > They fail as: > [16761.924321] Subscription rejected, limit reached (65535) > [16761.929661] Subscription rejected, limit reached (65535) > [16761.934966] Subscription rejected, limit reached (65535) > [16761.940281] Subscription rejected, limit reached (65535) > > And with one of the your patches (the last one "tipc: delete expired > subscription" which fixed it), I got: > general protection fault: 0000 [#1] SMP > Modules linked in: tipc(-) ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p > 9pnet virtio_net virtio_pci virtio_ring virtio [last unloaded: > ip6_udp_tunnel] > CPU: 15 PID: 8105 Comm: modprobe Tainted: G W 4.10.0+ #111 > Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 > task: ffff880c3c4fe2c0 task.stack: ffffc9002227c000 > RIP: 0010:tipc_nametbl_stop+0xf9/0x190 [tipc] > RSP: 0018:ffffc9002227fe00 EFLAGS: 00010286 > RAX: 0200000000000000 RBX: ffff880c4d8f5700 RCX: ffff880c3c5b8ab0 > RDX: ffffffff81c56cc0 RSI: 00000000000000c3 RDI: ffffea002ad57750 > RBP: ffffc9002227fe50 R08: 00000000ffff880c R09: 00000000ffffffff > R10: 00000000ffffffff R11: 0000000000000000 R12: 01ffffffffffff90 > R13: ffffffff81d02e00 R14: ffff880c4bba1360 R15: 01ffffffffffff90 > FS: 0000000000852880(0000) GS:ffff880c7f3c0000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000000878038 CR3: 0000000c3bda5000 CR4: 00000000001406e0 > Call Trace: > tipc_exit_net+0x2a/0x40 [tipc] > ops_exit_list.isra.8+0x38/0x60 > unregister_pernet_operations+0x90/0xe0 > unregister_pernet_subsys+0x21/0x30 > tipc_exit+0x15/0xc6d [tipc] > SyS_delete_module+0x192/0x200 > ? SyS_read+0x49/0xb0 > entry_SYSCALL_64_fastpath+0x13/0x94 > RIP: 0033:0x4abc07 > RSP: 002b:00007ffd9adbfe18 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 > RAX: ffffffffffffffda RBX: 0000000000877e20 RCX: 00000000004abc07 > RDX: 000000000000002e RSI: 0000000000000880 RDI: 00007ffd9adbfe20 > RBP: 0000000000000110 R08: 0000000000000004 R09: 0000000000000000 > R10: 000000000084cd30 R11: 0000000000000246 R12: 000000000084cce0 > R13: 0000000000877f30 R14: 0000000000000200 R15: 0000000000000110 > Code: 45 8b 4c 24 18 45 8b 44 24 14 4c 89 ef e8 40 ee ff ff 49 8d bc 24 > 80 00 00 00 be 80 00 00 00 4d 89 fc e8 3b 16 05 e1 49 8d 47 70 <49> 8b > 57 70 4c 39 f0 4c 8d 7a 90 75 bb 48 8b 43 20 48 85 c0 74 > RIP: tipc_nametbl_stop+0xf9/0x190 [tipc] RSP: ffffc9002227fe00 > > /Partha > > On 03/09/2017 03:37 PM, Ying Xue wrote: >> To be honest, I don't very like this solution. But I cannot think one >> solution is better than this one although I tried at least the following >> two methods to solve the issue. >> >> - Introduce one timeout list to subscriber. When subscription is >> expired, it will be moved to its timeout list. When name service is >> deleted or inserted, we are going to release all subscriptions linked in >> timeout list. After I tried to do the experiment, it's found we have to >> make a big change for current code; >> >> - In subscription's timeout handler, when subsription timeout event is >> sent to server through tipc_subscrp_send_event(), we can add a new flag >> in the message. When server sends the timeout message to user space send >> workqueue, it can send back a TIPC_SUB_CANCEL message to subscriber >> through tipc_subscrb_rcv_cb() if that new flag is set. When the >> TIPC_SUB_CANCEL message arrives at subscriber, the expired subscription >> can be deleted at the moment. The approach is still infeasible due to >> the same reason as previous one. >> >> In all, this solution is a bit elegant compared to above two. >> >> If you have any suggestion, please let me know. >> >> Thanks, >> Ying >> >> On 03/09/2017 10:21 PM, Ying Xue wrote: >>> After the design policy of subscription timeout is adjusted, >>> subscription is still present in name table after it's expired, and >>> hence the subscriber will receive new events for this subscription >>> until the subscriber terminates. >>> >>> Now when subscription is expired, it will be first deleted from name >>> table and its subscriber, and then schedule its work. Finally the >>> subscription will be destroyed in workqueue asynchronously. >>> >>> Signed-off-by: Ying Xue <yin...@wi...> >>> --- >>> net/tipc/server.h | 2 ++ >>> net/tipc/subscr.c | 32 ++++++++++++++++++++++++++++++++ >>> net/tipc/subscr.h | 1 + >>> 3 files changed, 35 insertions(+) >>> >>> diff --git a/net/tipc/server.h b/net/tipc/server.h >>> index 34f8055..7d5588f 100644 >>> --- a/net/tipc/server.h >>> +++ b/net/tipc/server.h >>> @@ -59,6 +59,7 @@ >>> * @name: server name >>> * @imp: message importance >>> * @type: socket type >>> + * @wq: workqueue >>> */ >>> struct tipc_server { >>> struct idr conn_idr; >>> @@ -78,6 +79,7 @@ struct tipc_server { >>> char name[TIPC_SERVER_NAME_LEN]; >>> int imp; >>> int type; >>> + struct workqueue_struct *wq; >>> }; >>> >>> int tipc_conn_sendmsg(struct tipc_server *s, int conid, >>> diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c >>> index ea67cce..3dcb009 100644 >>> --- a/net/tipc/subscr.c >>> +++ b/net/tipc/subscr.c >>> @@ -137,14 +137,34 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, >>> static void tipc_subscrp_timeout(unsigned long data) >>> { >>> struct tipc_subscription *sub = (struct tipc_subscription *)data; >>> + struct tipc_subscriber *subscriber = sub->subscriber; >>> + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); >>> >>> /* Notify subscriber of timeout */ >>> tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, >>> TIPC_SUBSCR_TIMEOUT, 0, 0); >>> >>> + spin_lock_bh(&subscriber->lock); >>> + list_del(&sub->subscrp_list); >>> + tipc_nametbl_unsubscribe(sub); >>> + spin_unlock_bh(&subscriber->lock); >>> + >>> + queue_work(tn->topsrv->wq, &sub->work); >>> + >>> tipc_subscrp_put(sub); >>> } >>> >>> +static void tipc_subscrp_work(struct work_struct *work) >>> +{ >>> + struct tipc_subscription *sub = container_of(work, >>> + struct tipc_subscription, work); >>> + struct tipc_subscriber *subscriber = sub->subscriber; >>> + >>> + spin_lock_bh(&subscriber->lock); >>> + tipc_subscrp_delete(sub); >>> + spin_unlock_bh(&subscriber->lock); >>> +} >>> + >>> static void tipc_subscrb_kref_release(struct kref *kref) >>> { >>> kfree(container_of(kref,struct tipc_subscriber, kref)); >>> @@ -282,6 +302,9 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, >>> memcpy(&sub->evt.s, s, sizeof(*s)); >>> atomic_inc(&tn->subscription_count); >>> kref_init(&sub->kref); >>> + >>> + INIT_WORK(&sub->work, tipc_subscrp_work); >>> + >>> return sub; >>> } >>> >>> @@ -379,6 +402,13 @@ int tipc_topsrv_start(struct net *net) >>> topsrv->tipc_conn_release = tipc_subscrb_release_cb; >>> >>> strncpy(topsrv->name, name, strlen(name) + 1); >>> + topsrv->wq = create_singlethread_workqueue(topsrv->name); >>> + if (!topsrv->wq) { >>> + kfree(saddr); >>> + kfree(topsrv); >>> + return -ENOMEM; >>> + } >>> + >>> tn->topsrv = topsrv; >>> atomic_set(&tn->subscription_count, 0); >>> >>> @@ -391,6 +421,8 @@ void tipc_topsrv_stop(struct net *net) >>> struct tipc_server *topsrv = tn->topsrv; >>> >>> tipc_server_stop(topsrv); >>> + flush_workqueue(topsrv->wq); >>> + destroy_workqueue(topsrv->wq); >>> kfree(topsrv->saddr); >>> kfree(topsrv); >>> } >>> diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h >>> index ee52957..50fc64b 100644 >>> --- a/net/tipc/subscr.h >>> +++ b/net/tipc/subscr.h >>> @@ -65,6 +65,7 @@ struct tipc_subscription { >>> struct list_head subscrp_list; >>> int swap; >>> struct tipc_event evt; >>> + struct work_struct work; >>> }; >>> >>> int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, >>> > |
From: Parthasarathy B. <par...@er...> - 2017-03-15 12:45:58
|
Hi Ying, Thanks for your feedback. I keep learning from your style of producing readable small patches doing one thing at a time. Hi John, Can you run your test with my series and provide some feedback? [PATCH v3 net-next 0/3] solve two deadlock issues: tipc: advance the time of calling tipc_nametbl_unsubscribe tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref regards partha ________________________________ From: Ying Xue <yin...@wi...> Sent: Wednesday, March 15, 2017 12:33 PM To: Parthasarathy Bhuvaragan; Jon Maloy; tho...@gm... Cc: tip...@li... Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Hi Partha, Thank you for the review and improvement. On 03/14/2017 01:43 AM, Parthasarathy Bhuvaragan wrote: > Hi Ying, > > I have a new patch sets which fixes this issue using fixes from your > patches. It deviates from your patch the following way: > In my solution, the subscription refcount keeps track of a subscription > with or without timer. I do not increment refcount for timer, and use > the subscriber lock plus the del_timer to find outstanding timers. I > will send the series shortly. I have carefully reviewed your solution as well as your revised patches. Your method is pretty simpler than mine. Instead the thing I image is too complex. Now I can confirm that it's unnecessary to increase subscription refcount before its timer is started, and it's absolutely safe for us now. Good work Parth! > > I applied your series and ran some tests with it. If I run test against > each patch individually, they seem to introduce new warnings/panics. I > think every patch should be as correct as possible. I tried to moved > around the patches in this series to get every patch correct, which led > me to my series above. > If we can ensure every single patch of a patchset can independently work very well, that's very good. But in many cases, it's hard to reach that goal. The most reason is that on one hand, we must have patch easily readable for reviewer, on another hand, we must keep every single work well. So it is sometimes very hard. Anyway, your revised patches are very good. If Jon or other guys have no any different opinion, please submit them to net-next as soon as possible. Of course, if possible, please let John verify them again. Thanks, Ying |
From: Ying X. <yin...@wi...> - 2017-03-16 10:54:57
|
On 03/15/2017 08:45 PM, Parthasarathy Bhuvaragan wrote: > Hi Ying, > > Thanks for your feedback. I keep learning from your style of producing > readable > > small patches doing one thing at a time. No, in fact your change is better than mine :) Regards, Ying > > |
From: John T. <tho...@gm...> - 2017-03-16 20:35:13
|
On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan < par...@er...> wrote: > > Hi John, > > Can you run your test with my series and provide some feedback? > Hi Partha I have started running some tests and will be in touch later today with initial results. Thanks Jt > > [PATCH v3 net-next 0/3] solve two deadlock issues: > > tipc: advance the time of calling tipc_nametbl_unsubscribe > tipc: advance the time of deleting subscription from > subscriber->subscrp_list > tipc: adjust the policy of holding subscription kref > > regards > partha > ------------------------------ > *From:* Ying Xue <yin...@wi...> > *Sent:* Wednesday, March 15, 2017 12:33 PM > *To:* Parthasarathy Bhuvaragan; Jon Maloy; tho...@gm... > *Cc:* tip...@li... > *Subject:* Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete > expired subscription > > Hi Partha, > > Thank you for the review and improvement. > > On 03/14/2017 01:43 AM, Parthasarathy Bhuvaragan wrote: > > Hi Ying, > > > > I have a new patch sets which fixes this issue using fixes from your > > patches. It deviates from your patch the following way: > > In my solution, the subscription refcount keeps track of a subscription > > with or without timer. I do not increment refcount for timer, and use > > the subscriber lock plus the del_timer to find outstanding timers. I > > will send the series shortly. > > I have carefully reviewed your solution as well as your revised patches. > Your method is pretty simpler than mine. Instead the thing I image is > too complex. Now I can confirm that it's unnecessary to increase > subscription refcount before its timer is started, and it's absolutely > safe for us now. Good work Parth! > > > > > I applied your series and ran some tests with it. If I run test against > > each patch individually, they seem to introduce new warnings/panics. I > > think every patch should be as correct as possible. I tried to moved > > around the patches in this series to get every patch correct, which led > > me to my series above. > > > > If we can ensure every single patch of a patchset can independently work > very well, that's very good. But in many cases, it's hard to reach that > goal. The most reason is that on one hand, we must have patch easily > readable for reviewer, on another hand, we must keep every single work > well. So it is sometimes very hard. > > Anyway, your revised patches are very good. If Jon or other guys have no > any different opinion, please submit them to net-next as soon as possible. > > Of course, if possible, please let John verify them again. > > Thanks, > Ying > > |
From: John T. <tho...@gm...> - 2017-03-17 02:44:37
|
On Fri, Mar 17, 2017 at 9:35 AM, John Thompson <tho...@gm...> wrote: > > > On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan < > par...@er...> wrote: >> >> Hi John, >> >> Can you run your test with my series and provide some feedback? >> > Hi Partha > I have started running some tests and will be in touch later today with > initial results. > Thanks > Jt > Hi Partha, My testing is looking good. I have generally seen problems reasonably quickly. This patch series seems good. I will run a longer run test over the weekend to be certain but it should be ok. Thanks, JT |
From: John T. <tho...@gm...> - 2017-03-20 01:18:50
|
On Fri, Mar 17, 2017 at 3:44 PM, John Thompson <tho...@gm...> wrote: > > > On Fri, Mar 17, 2017 at 9:35 AM, John Thompson <tho...@gm...> > wrote: > >> >> >> On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan < >> par...@er...> wrote: >>> >>> Hi John, >>> >>> Can you run your test with my series and provide some feedback? >>> >> Hi Partha >> I have started running some tests and will be in touch later today with >> initial results. >> Thanks >> Jt >> > > Hi Partha, > My testing is looking good. I have generally seen problems reasonably > quickly. This patch series seems good. I will run a longer run test over > the weekend to be certain but it should be ok. > > Thanks, > JT > Hi Partha, My testing over the weekend has looked good. I have not seen any soft lockups or other errors of any form. JT |
From: Ying X. <yin...@wi...> - 2017-03-20 01:30:38
|
On 03/20/2017 09:18 AM, John Thompson wrote: > Hi Partha, > > My testing over the weekend has looked good. I have not seen any soft > lockups or other errors of any form. Good news! Thanks John!! |
From: Jon M. <jon...@er...> - 2017-03-20 12:26:09
|
Series acked by me. Please send to ‘net’ since this problem really occurs frequently even in the released version. ///jon From: John Thompson [mailto:tho...@gm...] Sent: Sunday, March 19, 2017 09:19 PM To: Parthasarathy Bhuvaragan <par...@er...> Cc: Ying Xue <yin...@wi...>; tip...@li...; Jon Maloy <jon...@er...> Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription On Fri, Mar 17, 2017 at 3:44 PM, John Thompson <tho...@gm...<mailto:tho...@gm...>> wrote: On Fri, Mar 17, 2017 at 9:35 AM, John Thompson <tho...@gm...<mailto:tho...@gm...>> wrote: On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan <par...@er...<mailto:par...@er...>> wrote: Hi John, Can you run your test with my series and provide some feedback? Hi Partha I have started running some tests and will be in touch later today with initial results. Thanks Jt Hi Partha, My testing is looking good. I have generally seen problems reasonably quickly. This patch series seems good. I will run a longer run test over the weekend to be certain but it should be ok. Thanks, JT Hi Partha, My testing over the weekend has looked good. I have not seen any soft lockups or other errors of any form. JT |
From: Parthasarathy B. <par...@er...> - 2017-03-20 16:00:52
|
Hi, In that case I plan to send the only the first fix to net: tipc: advance the time of calling tipc_nametbl_unsubscribe Then wait till net is merged to netnext and submit the following to netnext: tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref Any objections? /Partha ________________________________ From: Jon Maloy Sent: Monday, March 20, 2017 1:25:57 PM To: John Thompson; Parthasarathy Bhuvaragan Cc: Ying Xue; tip...@li... Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Series acked by me. Please send to ‘net’ since this problem really occurs frequently even in the released version. ///jon From: John Thompson [mailto:tho...@gm...] Sent: Sunday, March 19, 2017 09:19 PM To: Parthasarathy Bhuvaragan <par...@er...> Cc: Ying Xue <yin...@wi...>; tip...@li...; Jon Maloy <jon...@er...> Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription On Fri, Mar 17, 2017 at 3:44 PM, John Thompson <tho...@gm...<mailto:tho...@gm...>> wrote: On Fri, Mar 17, 2017 at 9:35 AM, John Thompson <tho...@gm...<mailto:tho...@gm...>> wrote: On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan <par...@er...<mailto:par...@er...>> wrote: Hi John, Can you run your test with my series and provide some feedback? Hi Partha I have started running some tests and will be in touch later today with initial results. Thanks Jt Hi Partha, My testing is looking good. I have generally seen problems reasonably quickly. This patch series seems good. I will run a longer run test over the weekend to be certain but it should be ok. Thanks, JT Hi Partha, My testing over the weekend has looked good. I have not seen any soft lockups or other errors of any form. JT |
From: Jon M. <jon...@er...> - 2017-03-20 19:17:49
|
No objections. Just go ahead. ///jon From: Parthasarathy Bhuvaragan Sent: Monday, March 20, 2017 12:00 PM To: Jon Maloy <jon...@er...>; John Thompson <tho...@gm...> Cc: Ying Xue <yin...@wi...>; tip...@li... Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Hi, In that case I plan to send the only the first fix to net: tipc: advance the time of calling tipc_nametbl_unsubscribe Then wait till net is merged to netnext and submit the following to netnext: tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref Any objections? /Partha ________________________________ From: Jon Maloy Sent: Monday, March 20, 2017 1:25:57 PM To: John Thompson; Parthasarathy Bhuvaragan Cc: Ying Xue; tip...@li...<mailto:tip...@li...> Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Series acked by me. Please send to 'net' since this problem really occurs frequently even in the released version. ///jon From: John Thompson [mailto:tho...@gm...] Sent: Sunday, March 19, 2017 09:19 PM To: Parthasarathy Bhuvaragan <par...@er...<mailto:par...@er...>> Cc: Ying Xue <yin...@wi...<mailto:yin...@wi...>>; tip...@li...<mailto:tip...@li...>; Jon Maloy <jon...@er...<mailto:jon...@er...>> Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription On Fri, Mar 17, 2017 at 3:44 PM, John Thompson <tho...@gm...<mailto:tho...@gm...>> wrote: On Fri, Mar 17, 2017 at 9:35 AM, John Thompson <tho...@gm...<mailto:tho...@gm...>> wrote: On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan <par...@er...<mailto:par...@er...>> wrote: Hi John, Can you run your test with my series and provide some feedback? Hi Partha I have started running some tests and will be in touch later today with initial results. Thanks Jt Hi Partha, My testing is looking good. I have generally seen problems reasonably quickly. This patch series seems good. I will run a longer run test over the weekend to be certain but it should be ok. Thanks, JT Hi Partha, My testing over the weekend has looked good. I have not seen any soft lockups or other errors of any form. JT |
From: Jon M. <jon...@er...> - 2017-03-20 19:30:19
|
BTW, I think you should add a reference to the patch where the subscription refcount was introduced in the log message, so that Andrew will know how far back he should apply this patch on "stable". ///jon > -----Original Message----- > From: Jon Maloy [mailto:jon...@er...] > Sent: Monday, March 20, 2017 03:18 PM > To: Parthasarathy Bhuvaragan <par...@er...>; > John Thompson <tho...@gm...> > Cc: tip...@li... > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > No objections. Just go ahead. > > ///jon > > > From: Parthasarathy Bhuvaragan > Sent: Monday, March 20, 2017 12:00 PM > To: Jon Maloy <jon...@er...>; John Thompson > <tho...@gm...> > Cc: Ying Xue <yin...@wi...>; tipc- > dis...@li... > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > Hi, > > In that case I plan to send the only the first fix to net: > > tipc: advance the time of calling tipc_nametbl_unsubscribe > > Then wait till net is merged to netnext and submit the following to netnext: > tipc: advance the time of deleting subscription from > subscriber->subscrp_list > tipc: adjust the policy of holding subscription kref > > Any objections? > > /Partha > > ________________________________ > From: Jon Maloy > Sent: Monday, March 20, 2017 1:25:57 PM > To: John Thompson; Parthasarathy Bhuvaragan > Cc: Ying Xue; tip...@li...<mailto:tipc- > dis...@li...> > Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > Series acked by me. Please send to 'net' since this problem really occurs > frequently even in the released version. > > ///jon > > > From: John Thompson [mailto:tho...@gm...] > Sent: Sunday, March 19, 2017 09:19 PM > To: Parthasarathy Bhuvaragan > <par...@er...<mailto:parthasarathy.bhuvaraga > n...@er...>> > Cc: Ying Xue <yin...@wi...<mailto:yin...@wi...>>; > tip...@li...<mailto:tipc- > dis...@li...>; Jon Maloy > <jon...@er...<mailto:jon...@er...>> > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > On Fri, Mar 17, 2017 at 3:44 PM, John Thompson > <tho...@gm...<mailto:tho...@gm...>> wrote: > > > On Fri, Mar 17, 2017 at 9:35 AM, John Thompson > <tho...@gm...<mailto:tho...@gm...>> wrote: > > > On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan > <par...@er...<mailto:parthasarathy.bhuvaraga > n...@er...>> wrote: > > Hi John, > > Can you run your test with my series and provide some feedback? > Hi Partha > I have started running some tests and will be in touch later today with initial > results. > Thanks > Jt > > Hi Partha, > My testing is looking good. I have generally seen problems reasonably > quickly. This patch series seems good. I will run a longer run test over the > weekend to be certain but it should be ok. > > Thanks, > JT > Hi Partha, > > My testing over the weekend has looked good. I have not seen any soft > lockups or other errors of any form. > > JT > > ------------------------------------------------------------------------------ > Check out the vibrant tech community on one of the world's most engaging > tech sites, Slashdot.org! http://sdm.link/slashdot > _______________________________________________ > tipc-discussion mailing list > tip...@li... > https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: Parthasarathy B. <par...@er...> - 2017-03-21 08:20:17
|
Hi Jon, Ok, I will submit it today. I usually add the Fixes tag in the tag section instead of specifying it in the body of the commit message like you do. I followed the instructions specified in: http://lxr.free-electrons.com/source/Documentation/SubmittingPatches?v=3.16#L135 This commit to net has the following tag: Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") /Partha ________________________________ From: Jon Maloy Sent: Monday, March 20, 2017 8:30 PM To: Jon Maloy; Parthasarathy Bhuvaragan; John Thompson Cc: tip...@li... Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription BTW, I think you should add a reference to the patch where the subscription refcount was introduced in the log message, so that Andrew will know how far back he should apply this patch on "stable". ///jon > -----Original Message----- > From: Jon Maloy [mailto:jon...@er...] > Sent: Monday, March 20, 2017 03:18 PM > To: Parthasarathy Bhuvaragan <par...@er...>; > John Thompson <tho...@gm...> > Cc: tip...@li... > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > No objections. Just go ahead. > > ///jon > > > From: Parthasarathy Bhuvaragan > Sent: Monday, March 20, 2017 12:00 PM > To: Jon Maloy <jon...@er...>; John Thompson > <tho...@gm...> > Cc: Ying Xue <yin...@wi...>; tipc- > dis...@li... > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > Hi, > > In that case I plan to send the only the first fix to net: > > tipc: advance the time of calling tipc_nametbl_unsubscribe > > Then wait till net is merged to netnext and submit the following to netnext: > tipc: advance the time of deleting subscription from > subscriber->subscrp_list > tipc: adjust the policy of holding subscription kref > > Any objections? > > /Partha > > ________________________________ > From: Jon Maloy > Sent: Monday, March 20, 2017 1:25:57 PM > To: John Thompson; Parthasarathy Bhuvaragan > Cc: Ying Xue; tip...@li...<mailto:tipc- > dis...@li...> > Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > Series acked by me. Please send to 'net' since this problem really occurs > frequently even in the released version. > > ///jon > > > From: John Thompson [mailto:tho...@gm...] > Sent: Sunday, March 19, 2017 09:19 PM > To: Parthasarathy Bhuvaragan > <par...@er...<mailto:parthasarathy.bhuvaraga > n...@er...>> > Cc: Ying Xue <yin...@wi...<mailto:yin...@wi...>>; > tip...@li...<mailto:tipc- > dis...@li...>; Jon Maloy > <jon...@er...<mailto:jon...@er...>> > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > On Fri, Mar 17, 2017 at 3:44 PM, John Thompson > <tho...@gm...<mailto:tho...@gm...>> wrote: > > > On Fri, Mar 17, 2017 at 9:35 AM, John Thompson > <tho...@gm...<mailto:tho...@gm...>> wrote: > > > On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan > <par...@er...<mailto:parthasarathy.bhuvaraga > n...@er...>> wrote: > > Hi John, > > Can you run your test with my series and provide some feedback? > Hi Partha > I have started running some tests and will be in touch later today with initial > results. > Thanks > Jt > > Hi Partha, > My testing is looking good. I have generally seen problems reasonably > quickly. This patch series seems good. I will run a longer run test over the > weekend to be certain but it should be ok. > > Thanks, > JT > Hi Partha, > > My testing over the weekend has looked good. I have not seen any soft > lockups or other errors of any form. > > JT > > ------------------------------------------------------------------------------ > Check out the vibrant tech community on one of the world's most engaging > tech sites, Slashdot.org! http://sdm.link/slashdot > _______________________________________________ > tipc-discussion mailing list > tip...@li... > https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: Jon M. <jon...@er...> - 2017-03-21 11:36:47
|
From: Parthasarathy Bhuvaragan Sent: Tuesday, March 21, 2017 04:20 AM To: Jon Maloy <jon...@er...>; John Thompson <tho...@gm...> Cc: tip...@li... Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Hi Jon, Ok, I will submit it today. I usually add the Fixes tag in the tag section instead of specifying it in the body of the commit message like you do. I followed the instructions specified in: http://lxr.free-electrons.com/source/Documentation/SubmittingPatches?v=3.16#L135 This commit to net has the following tag: Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") No problem. I didn't notice that. ///jon /Partha ________________________________ From: Jon Maloy Sent: Monday, March 20, 2017 8:30 PM To: Jon Maloy; Parthasarathy Bhuvaragan; John Thompson Cc: tip...@li...<mailto:tip...@li...> Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription BTW, I think you should add a reference to the patch where the subscription refcount was introduced in the log message, so that Andrew will know how far back he should apply this patch on "stable". ///jon > -----Original Message----- > From: Jon Maloy [mailto:jon...@er...] > Sent: Monday, March 20, 2017 03:18 PM > To: Parthasarathy Bhuvaragan <par...@er...<mailto:par...@er...>>; > John Thompson <tho...@gm...<mailto:tho...@gm...>> > Cc: tip...@li...<mailto:tip...@li...> > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > No objections. Just go ahead. > > ///jon > > > From: Parthasarathy Bhuvaragan > Sent: Monday, March 20, 2017 12:00 PM > To: Jon Maloy <jon...@er...<mailto:jon...@er...>>; John Thompson > <tho...@gm...<mailto:tho...@gm...>> > Cc: Ying Xue <yin...@wi...<mailto:yin...@wi...>>; tipc- > dis...@li...<mailto:dis...@li...> > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > Hi, > > In that case I plan to send the only the first fix to net: > > tipc: advance the time of calling tipc_nametbl_unsubscribe > > Then wait till net is merged to netnext and submit the following to netnext: > tipc: advance the time of deleting subscription from > subscriber->subscrp_list > tipc: adjust the policy of holding subscription kref > > Any objections? > > /Partha > > ________________________________ > From: Jon Maloy > Sent: Monday, March 20, 2017 1:25:57 PM > To: John Thompson; Parthasarathy Bhuvaragan > Cc: Ying Xue; tip...@li...<mailto:tipc-<mailto:tip...@li...%3cmailto:tipc-> > dis...@li...<mailto:dis...@li...>> > Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > Series acked by me. Please send to 'net' since this problem really occurs > frequently even in the released version. > > ///jon > > > From: John Thompson [mailto:tho...@gm...] > Sent: Sunday, March 19, 2017 09:19 PM > To: Parthasarathy Bhuvaragan > <par...@er...<mailto:parthasarathy.bhuvaraga <mailto:par...@er...%3cmailto:parthasarathy.bhuvaraga%0b>> n...@er...<mailto:n...@er...>>> > Cc: Ying Xue <yin...@wi...<mailto:yin...@wi...<mailto:yin...@wi...%3cmailto:yin...@wi...>>>; > tip...@li...<mailto:tipc-<mailto:tip...@li...%3cmailto:tipc-> > dis...@li...<mailto:dis...@li...>>; Jon Maloy > <jon...@er...<mailto:jon...@er...<mailto:jon...@er...%3cmailto:jon...@er...>>> > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > On Fri, Mar 17, 2017 at 3:44 PM, John Thompson > <tho...@gm...<mailto:tho...@gm...<mailto:tho...@gm...%3cmailto:tho...@gm...>>> wrote: > > > On Fri, Mar 17, 2017 at 9:35 AM, John Thompson > <tho...@gm...<mailto:tho...@gm...<mailto:tho...@gm...%3cmailto:tho...@gm...>>> wrote: > > > On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan > <par...@er...<mailto:parthasarathy.bhuvaraga <mailto:par...@er...%3cmailto:parthasarathy.bhuvaraga%0b>> n...@er...<mailto:n...@er...>>> wrote: > > Hi John, > > Can you run your test with my series and provide some feedback? > Hi Partha > I have started running some tests and will be in touch later today with initial > results. > Thanks > Jt > > Hi Partha, > My testing is looking good. I have generally seen problems reasonably > quickly. This patch series seems good. I will run a longer run test over the > weekend to be certain but it should be ok. > > Thanks, > JT > Hi Partha, > > My testing over the weekend has looked good. I have not seen any soft > lockups or other errors of any form. > > JT > > ------------------------------------------------------------------------------ > Check out the vibrant tech community on one of the world's most engaging > tech sites, Slashdot.org! http://sdm.link/slashdot > _______________________________________________ > tipc-discussion mailing list > tip...@li...<mailto:tip...@li...> > https://lists.sourceforge.net/lists/listinfo/tipc-discussion |
From: Parthasarathy B. <par...@er...> - 2017-03-13 17:58:16
|
commit d094c4d5f5 ("tipc: add subscription refcount to avoid invalid delete") accidently introduce the following deadlock scenarios: CPU1: CPU2: ---------- ---------------- tipc_nametbl_publish spin_lock_bh(&tn->nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <<grab nametbl_lock again>> CPU1: CPU2: ---------- ---------------- tipc_nametbl_stop spin_lock_bh(&tn->nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <<grab nametbl_lock again>> The root cause of two deadlocks is that we have to hold nametbl lock when subscription is freed in tipc_subscrp_kref_release(). In this series we simplify tipc_subscrp_kref_release(), by moving functions which acquire locks to other relevant places. Patch #1: bug fix for the above issue. Patch #2-3: improve subscription locking/refcounting. Change log: v3: In v3, the subscription refcount keeps track of a subscription with or without timer. We do not increment refcount for timer, and use the subscriber lock plus the del_timer to find outstanding timers. After this change tipc_subscrp_kref_release() is lock free. v2: As Parth's comments, subscription is still present name table after it's expired. To fix it, we introduce a workqueue, and when subscription's timer is expired, the subscription will be pushed to the workqueue through its work. When the work is scheduled, the subscription be deleted finally. Ying Xue (3): tipc: advance the time of calling tipc_nametbl_unsubscribe tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref net/tipc/name_table.c | 2 ++ net/tipc/subscr.c | 24 ++++++++++-------------- net/tipc/subscr.h | 3 +++ 3 files changed, 15 insertions(+), 14 deletions(-) -- 2.1.4 |
[tipc-discussion] [PATCH v3 net-next 1/3] tipc: advance the time of
calling tipc_nametbl_unsubscribe
From: Parthasarathy B. <par...@er...> - 2017-03-13 17:58:18
|
From: Ying Xue <yin...@wi...> Until now, tipc_nametbl_unsubscribe() is called at subscriptions reference count cleanup. Usually the subscriptions cleanup is called at subscription timeout or at subscription cancel or at subscriber delete. We have ignored the possibility of this being called from other locations, which causes deadlock as we try to grab the tn->nametbl_lock while holding it already. CPU1: CPU2: ---------- ---------------- tipc_nametbl_publish spin_lock_bh(&tn->nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <<grab nametbl_lock again>> CPU1: CPU2: ---------- ---------------- tipc_nametbl_stop spin_lock_bh(&tn->nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <<grab nametbl_lock again>> In this commit, we advance the calling of tipc_nametbl_unsubscribe() from the refcount cleanup to the intended callers. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: John Thompson <tho...@gm...> Reported-by: Jon Maloy <jon...@er...> Signed-off-by: Ying Xue <yin...@wi...> Signed-off-by: Parthasarathy Bhuvaragan <par...@er...> --- v2: Call tipc_nametbl_unsubscribe() at tipc_subscrp_timeout() too. This should fix the deadlock and also ensure that applications will not receive any new notifications after timeout event. --- net/tipc/subscr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..271cd66e4b3b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); -- 2.1.4 |
From: Parthasarathy B. <par...@er...> - 2017-03-13 17:58:20
|
In filter_connect, we use waitqueue_active() to check for any connections to wakeup. But waitqueue_active() is missing memory barriers while accessing the critical sections, leading to inconsistent results. In this commit, we replace this with an SMB safe wq_has_sleeper(). Signed-off-by: Parthasarathy Bhuvaragan <par...@er...> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 79e628cd08a9..ce6ed0955e36 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1577,7 +1577,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) + if (wq_has_sleeper(rcu_dereference(sk->sk_wq))) wake_up_interruptible(sk_sleep(sk)); /* 'ACK-' message is neither accepted nor rejected: */ -- 2.1.4 |