From: Hans F. <han...@er...> - 2014-03-31 11:07:56
|
osaf/services/saf/amf/amfnd/clc.cc | 26 +++++++++++++++++++------- osaf/services/saf/amf/amfnd/err.cc | 30 ------------------------------ 2 files changed, 19 insertions(+), 37 deletions(-) During component fail-over a standby component can be activated before cleanup of the faulty component has finished effectively introducing split brain on component level. This happens because cleanup is not awaited before the SUSI response message is sent to the director. Fix this by sending the response after the cleanup has finished. diff --git a/osaf/services/saf/amf/amfnd/clc.cc b/osaf/services/saf/amf/amfnd/clc.cc --- a/osaf/services/saf/amf/amfnd/clc.cc +++ b/osaf/services/saf/amf/amfnd/clc.cc @@ -846,13 +846,6 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB * /* get the final presence state */ final_st = comp->pres; - if (ev == AVND_COMP_CLC_PRES_FSM_EV_CLEANUP || ev == AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC) { - /* we need to delete all curr_info, pxied will have cbk for cleanup */ - if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { - avnd_comp_curr_info_del(cb, comp); - } - } - TRACE_1("Exited CLC FSM"); TRACE_1("'%s':FSM Enter presence state: '%s':FSM Exit presence state:%s", comp->name.value,pres_state[prv_st],pres_state[final_st]); @@ -1629,6 +1622,15 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_ goto done; } + /* + * su-sis may be in assigning/removing state. signal csi + * assign/remove done so that su-si assignment/removal algo can proceed. + */ + avnd_comp_cmplete_all_assignment(cb, comp); + + /* delete curr info of the failed comp */ + avnd_comp_curr_info_del(cb, comp); + if ((clc_info->inst_retry_cnt < clc_info->inst_retry_max) && (AVND_COMP_INST_EXIT_CODE_NO_RETRY != clc_info->inst_code_rcvd)) { /* => keep retrying */ @@ -1971,6 +1973,7 @@ uint32_t avnd_comp_clc_terming_termsucc_ if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { m_AVND_COMP_REG_PARAM_RESET(cb, comp); m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, AVND_CKPT_COMP_CONFIG); + avnd_comp_curr_info_del(cb, comp); } TRACE_LEAVE(); @@ -2078,6 +2081,15 @@ uint32_t avnd_comp_clc_terming_cleansucc } } + /* + * su-sis may be in assigning/removing state. signal csi + * assign/remove done so that su-si assignment/removal algo can proceed. + */ + avnd_comp_cmplete_all_assignment(cb, comp); + + /* delete curr info of the failed comp */ + avnd_comp_curr_info_del(cb, comp); + /* reset the comp-reg & instantiate params */ if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { m_AVND_COMP_REG_PARAM_RESET(cb, comp); diff --git a/osaf/services/saf/amf/amfnd/err.cc b/osaf/services/saf/amf/amfnd/err.cc --- a/osaf/services/saf/amf/amfnd/err.cc +++ b/osaf/services/saf/amf/amfnd/err.cc @@ -521,13 +521,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A return rc; m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, AVND_CKPT_COMP_OPER_STATE); - /* - * SU may be in the middle of SU_SI in assigning/removing state. - * signal csi assign/remove done so that su-si assignment/removal - * algo can proceed. - */ - avnd_comp_cmplete_all_assignment(cb, comp); - /* clean up the comp */ rc = avnd_comp_clc_fsm_run(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); @@ -702,23 +695,12 @@ uint32_t avnd_err_rcvr_comp_failover(AVN m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED); m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE); - /* - * su-sis may be in assigning/removing state. signal csi - * assign/remove done so that su-si assignment/removal algo can proceed. - */ - avnd_comp_cmplete_all_assignment(cb, failed_comp); - /* We are now in the context of failover, forget the restart */ if (su->pres == SA_AMF_PRESENCE_RESTARTING || m_AVND_SU_IS_RESTART(su)) { m_AVND_SU_RESTART_RESET(su); m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_FLAG_CHANGE); } - /* delete curr info of the failed comp */ - rc = avnd_comp_curr_info_del(cb, failed_comp); - if (NCSCC_RC_SUCCESS != rc) - goto done; - // TODO: there should be no difference between PI/NPI comps if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { /* clean the failed comp */ @@ -832,24 +814,12 @@ uint32_t avnd_err_rcvr_node_switchover(A goto done; } - - /* - * su-sis may be in assigning/removing state. signal csi - * assign/remove done so that su-si assignment/removal algo can proceed. - */ - avnd_comp_cmplete_all_assignment(cb, failed_comp); - /* We are now in the context of failover, forget the restart */ if (failed_su->pres == SA_AMF_PRESENCE_RESTARTING || m_AVND_SU_IS_RESTART(failed_su)) { m_AVND_SU_RESTART_RESET(failed_su); m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_su, AVND_CKPT_SU_FLAG_CHANGE); } - /* delete curr info of the failed comp */ - rc = avnd_comp_curr_info_del(cb, failed_comp); - if (NCSCC_RC_SUCCESS != rc) - goto done; - /* In nodeswitchover context: a)If saAmfSUFailover is set for the faulted SU then this SU will be failed-over as a single entity. |