Menu

#3237 imm: amfd heartbeat timeout when multi partitioned clusters rejoin

5.20.11
fixed
None
defect
imm
-
major
False
2020-11-27
2020-11-16
Thuan Tran
No

When multi partitioned clusters rejoin, AMFD get heartbeat timeout due to stuck in IMM operation.

Program terminated with signal SIGABRT, Aborted.
#0  0x00007fcd788f7cb9 in __GI___poll (fds=fds@entry=0x7ffc7c0f5bd8, nfds=nfds@entry=1, timeout=timeout@entry=60000) at ../sysdeps/unix/sysv/linux/poll.c:29
[Current thread is 1 (Thread 0x7fcd7a2bfc40 (LWP 247))]

Thread 1 (Thread 0x7fcd7a2bfc40 (LWP 247)):
#0  0x00007fcd788f7cb9 in __GI___poll (fds=fds@entry=0x7ffc7c0f5bd8, nfds=nfds@entry=1, timeout=timeout@entry=60000) at ../sysdeps/unix/sysv/linux/poll.c:29
        resultvar = 18446744073709551612
        sc_cancel_oldtype = 0
        sc_ret = <optimized out>
#1  0x00007fcd793d06ee in poll (__timeout=60000, __nfds=1, __fds=0x7ffc7c0f5bd8) at /usr/include/x86_64-linux-gnu/bits/poll2.h:46
No locals.
#2  osaf_ppoll (io_fds=0x7ffc7c0f5bd8, i_nfds=1, i_timeout_ts=i_timeout_ts@entry=0x7ffc7c0f5bb0, i_sigmask=i_sigmask@entry=0x0) at src/base/osaf_poll.c:108
        current_time = {tv_sec = 94486528873241, tv_nsec = 4307333024}
        elapsed_time = {tv_sec = 0, tv_nsec = 0}
        time_left = 60000
        start_time = {tv_sec = 335081, tv_nsec = 145963567}
        time_left_ts = <optimized out>
        result = <optimized out>
#3  0x00007fcd793d0898 in osaf_poll (io_fds=<optimized out>, i_nfds=<optimized out>, i_timeout=<optimized out>) at src/base/osaf_poll.c:46
        timeout_ts = {tv_sec = 60, tv_nsec = 0}
        i_timeout = <optimized out>
        i_nfds = <optimized out>
        io_fds = <optimized out>
        timeout_ts = <optimized out>
        timeout_ts = <optimized out>
#4  0x00007fcd793d08db in osaf_poll_one_fd (i_fd=37, i_timeout=60000) at src/base/osaf_poll.c:133
        set = {fd = 37, events = 1, revents = 0}
        result = <optimized out>
#5  0x00007fcd793ee55d in mds_mcm_time_wait (sel_obj=0x55ef5c04a238, time_val=6000) at src/mds/mds_c_sndrcv.c:3023
        errnum = <optimized out>
        count = <optimized out>
#6  0x00007fcd793f4ff9 in mcm_pvt_normal_svc_sndrsp (pri=<optimized out>, req=0x7ffc7c0f5c90, to_svc_id=25, to_dest=<optimized out>, msg=<optimized out>, fr_svc_id=27, env_hdl=131071) at src/mds/mds_c_sndrcv.c:2880
        xch_id = 1269
        sync_queue = 0x55ef5c04a230
        status = 1
        send_msg = {msg_type = 1 '\001', data = {msg = 0x7ffc7c0f5e20, info = {len = 24096, buff = 0x0}}, msg_fmt_ver = 0, rem_svc_sub_part_ver = 0 '\000', rem_svc_arch_word = 0 '\000', mds_bcast_list_hdr = 0x0, bcast_buff_len = 0}
        xch_id = <optimized out>
        sync_queue = <optimized out>
        status = <optimized out>
        send_msg = <optimized out>
#7  mds_mcm_send (info=0x7ffc7c0f5d40) at src/mds/mds_c_sndrcv.c:792
        status = 1
        req = {i_msg = 0x0, i_to_svc = 25, i_priority = 0, i_sendtype = MDS_SENDTYPE_SNDRSP, info = {snd = {i_to_dest = 0}, sndrsp = {i_to_dest = 0, i_time_to_wait = 6000, o_rsp = 0x0, buff = 0x0, len = 0, o_msg_fmt_ver = 0}, sndrack = {i_sender_dest = 0, i_time_to_wait = 6000, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, sndack = {i_to_dest = 0, i_time_to_wait = 6000}, rsp = {i_sender_dest = 0, i_msg_ctxt = {length = 112 'p', data = "\027\000\000\000\000\000\000\000\000\000\000"}}, red = {i_to_vdest = 0, i_to_anc = 6000}, redrsp = {i_to_vdest = 0, i_to_anc = 6000, i_time_to_wait = 0, o_rsp = 0x0, buff = 0x0, len = 0, o_msg_fmt_ver = 0}, redrack = {i_to_vdest = 0, i_to_anc = 6000, i_time_to_wait = 0, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, redack = {i_to_vdest = 0, i_to_anc = 6000, i_time_to_wait = 0}, rrsp = {i_to_dest = 0, i_to_anc = 6000, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, bcast = {i_bcast_scope = 0}, rbcast = {i_bcast_scope = 0}}}
        status = <optimized out>
        req = <optimized out>
#8  mds_send (info=info@entry=0x7ffc7c0f5d40) at src/mds/mds_c_sndrcv.c:458
        snd_type_major = <optimized out>
#9  0x00007fcd793fc3ac in ncsmds_api (svc_to_mds_info=svc_to_mds_info@entry=0x7ffc7c0f5d40) at src/mds/mds_papi.c:165
        status = <optimized out>
#10 0x00007fcd7984fd00 in imma_mds_msg_sync_send (imma_mds_hdl=<optimized out>, destination=<optimized out>, i_evt=<optimized out>, o_evt=0x7ffc7c0f6020, timeout=<optimized out>) at src/imm/agent/imma_mds.cc:597
        cb = <optimized out>
        mds_info = {i_mds_hdl = 131071, i_svc_id = 27, i_op = MDS_SEND, info = {svc_install = {i_yr_svc_hdl = 140722389868064, i_install_scope = 25, i_svc_cb = 0x1, o_dest = 567412424442051, o_anc = 6000, i_mds_q_ownership = false, o_sel_obj = {raise_obj = 0, rmv_obj = 0}, i_mds_svc_pvt_ver = 0 '\000', i_fail_no_active_sends = false, i_msg_loss_indication = false}, svc_uninstall = {i_msg_free_cb = 0x7ffc7c0f5e20}, svc_subscribe = {i_scope = 2081381920, i_num_svcs = 252 '\374', i_svc_ids = 0x200000019}, red_subscribe = {i_scope = 2081381920, i_num_svcs = 252 '\374', i_svc_ids = 0x200000019}, svc_cancel = {i_num_svcs = 32 ' ', i_svc_ids = 0x200000019}, svc_sys_subscribe = {i_evt_map = 2081381920}, svc_send = {i_msg = 0x7ffc7c0f5e20, i_to_svc = 25, i_priority = MDS_SEND_PRIORITY_MEDIUM, i_sendtype = MDS_SENDTYPE_SNDRSP, info = {snd = {i_to_dest = 567412424442051}, sndrsp = {i_to_dest = 567412424442051, i_time_to_wait = 6000, o_rsp = 0x0, buff = 0x0, len = 0, o_msg_fmt_ver = 0}, sndrack = {i_sender_dest = 567412424442051, i_time_to_wait = 6000, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, sndack = {i_to_dest = 567412424442051, i_time_to_wait = 6000}, rsp = {i_sender_dest = 567412424442051, i_msg_ctxt = {length = 112 'p', data = "\027\000\000\000\000\000\000\000\000\000\000"}}, red = {i_to_vdest = 567412424442051, i_to_anc = 6000}, redrsp = {i_to_vdest = 567412424442051, i_to_anc = 6000, i_time_to_wait = 0, o_rsp = 0x0, buff = 0x0, len = 0, o_msg_fmt_ver = 0}, redrack = {i_to_vdest = 567412424442051, i_to_anc = 6000, i_time_to_wait = 0, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, redack = {i_to_vdest = 567412424442051, i_to_anc = 6000, i_time_to_wait = 0}, rrsp = {i_to_dest = 567412424442051, i_to_anc = 6000, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, bcast = {i_bcast_scope = 195}, rbcast = {i_bcast_scope = 195}}}, svc_direct_send = {i_direct_buff = 0x7ffc7c0f5e20 "", i_direct_buff_len = 25, i_to_svc = 2, i_priority = MDS_SEND_PRIORITY_LOW, i_sendtype = MDS_SENDTYPE_SND, i_msg_fmt_ver = 195, info = {snd = {i_to_dest = 6000}, sndrsp = {i_to_dest = 6000, i_time_to_wait = 0, o_rsp = 0x0, buff = 0x0, len = 0, o_msg_fmt_ver = 0}, sndrack = {i_sender_dest = 6000, i_time_to_wait = 0, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, sndack = {i_to_dest = 6000, i_time_to_wait = 0}, rsp = {i_sender_dest = 6000, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, red = {i_to_vdest = 6000, i_to_anc = 0}, redrsp = {i_to_vdest = 6000, i_to_anc = 0, i_time_to_wait = 0, o_rsp = 0x0, buff = 0x0, len = 0, o_msg_fmt_ver = 0}, redrack = {i_to_vdest = 6000, i_to_anc = 0, i_time_to_wait = 0, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, redack = {i_to_vdest = 6000, i_to_anc = 0, i_time_to_wait = 0}, rrsp = {i_to_dest = 6000, i_to_anc = 0, i_msg_ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}}, bcast = {i_bcast_scope = 6000}, rbcast = {i_bcast_scope = 6000}}}, retrieve_msg = {i_dispatchFlags = 2081381920}, chg_role = {new_role = 2081381920}, query_dest = {i_dest = 140722389868064, i_svc_id = 25, i_query_for_role = 2, info = {query_for_anc = {i_vdest_rl = V_DEST_RL_ACTIVE, o_anc = 567412424442051}, query_for_role = {i_anc = 1, o_vdest_rl = 195}}, o_local = 112, o_node_id = 0, o_adest = 0}, query_pwe = {o_pwe_id = 24096, o_absolute = 15, info = {abs_info = {o_adest = 8589934617}, virt_info = {o_vdest = 8589934617, o_anc = 1, o_role = 195}}}, subscribe_node = {i_dummy = 2081381920}, unsubscribe_node = {i_dummy = 2081381920}}}
        rc = <optimized out>
#11 0x00007fcd7985bc7d in imma_evt_fake_evs (cb=0x7fcd79a69300 <imma_cb>, i_evt=<optimized out>, o_evt=0x7ffc7c0f6020, timeout=6000, immHandle=416611959823, locked=<optimized out>, checkWritable=true) at src/imm/agent/imma_proc.cc:3625
        rc = SA_AIS_OK
        fevs_evt = {next = 0x0, type = IMMSV_EVT_TYPE_IMMND, info = {imma = {type = 0, info = {initRsp = {immHandle = 14, error = SA_AIS_OK}, errRsp = {error = SA_AIS_ERR_EXIST, errStrings = 0x1}, admInitRsp = {error = SA_AIS_ERR_EXIST, ownerId = 0}, ccbInitRsp = {error = SA_AIS_ERR_EXIST, ccbId = 0}, searchInitRsp = {error = SA_AIS_ERR_EXIST, searchId = 0}, searchNextRsp = 0xe, searchBundleNextRsp = 0xe, searchRemote = {client_hdl = 14, requestNodeId = 1, remoteNodeId = 0, searchId = 0, objectName = {size = 132111, buf = 0x60 <error: Cannot access memory at address 0x60>}, attributeNames = 0x55ef5c0d0f4c}, admOpReq = {adminOwnerId = 14, invocation = 0, operationId = 1, continuationId = 0, timeout = 416611959823, objectName = {size = 96, buf = 0x55ef5c0d0f4c ""}, params = 0x0}, admOpRsp = {oi_client_hdl = 14, invocation = 1, result = 0, error = 0, parms = 0x610002040f}, objCreate = {ccbId = 14, adminOwnerId = 0, className = {size = 1, buf = 0x0}, parentOrObjectDn = {size = 132111, buf = 0x60 <error: Cannot access memory at address 0x60>}, attrValues = 0x55ef5c0d0f4c, immHandle = 0}, objDelete = {ccbId = 14, adminOwnerId = 0, objectName = {size = 1, buf = 0x0}, immHandle = 416611959823}, objModify = {ccbId = 14, adminOwnerId = 0, objectName = {size = 1, buf = 0x0}, attrMods = 0x610002040f, immHandle = 96}, ccbCompl = {ccbId = 14, implId = 0, invocation = 1, immHandle = 0}, classDescr = {className = {size = 14, buf = 0x1 <error: Cannot access memory at address 0x1>}, classCategory = 0, attrDefinitions = 0x610002040f}, implSetRsp = {error = SA_AIS_ERR_EXIST, implId = 0}, tmr_info = {type = 14, adm_owner_hdl = 1, client_hdl = 0, invocation = 416611959823}}}, immnd = {dont_free_me = false, unused1 = false, unused2 = false, unused3 = false, error = 0, type = IMMND_EVT_A2ND_IMM_FEVS, info = {initReq = {version = {releaseCode = 1 '\001', majorVersion = 0 '\000', minorVersion = 0 '\000'}, client_pid = 0}, finReq = {client_hdl = 1}, adminitReq = {client_hdl = 1, i = {adminOwnerName = {_opaque = {0, 0, 0, 0, 1039, 2, 97, 0, 96, 0, 0, 0, 3916, 23565, 21999, 0 <repeats 114 times>}}, releaseOwnershipOnFinalize = false}}, ccbinitReq = {adminOwnerId = 1, ccbFlags = 0, client_hdl = 416611959823}, implSet = {client_hdl = 1, impl_name = {size = 0, buf = 0x610002040f <error: Cannot access memory at address 0x610002040f>}, impl_id = 96, scope = 0, oi_timeout = 1544359756}, admFinReq = {adm_owner_id = 1}, admReq = {adm_owner_id = 1, scope = 0, objectNames = 0x0}, admOpReq = {adminOwnerId = 1, invocation = 0, operationId = 0, continuationId = 416611959823, timeout = 96, objectName = {size = 1544359756, buf = 0x0}, params = 0x0}, fevsReq = {sender_count = 1, reply_dest = 0, client_hdl = 416611959823, msg = {size = 96, buf = 0x55ef5c0d0f4c ""}, isObjSync = 0 '\000', ex_immd_node_id = 0}, admOpRsp = {oi_client_hdl = 1, invocation = 0, result = 132111, error = 97, parms = 0x60}, ccbUpcallRsp = {oi_client_hdl = 1, ccbId = 0, implId = 0, inv = 132111, result = 97, name = {_opaque = {96, 0, 0, 0, 3916, 23565, 21999, 0 <repeats 122 times>}}, errorString = {size = 0, buf = 0x0}}, classDescr = {className = {size = 1, buf = 0x0}, classCategory = 132111, attrDefinitions = 0x60}, objCreate = {ccbId = 1, adminOwnerId = 0, className = {size = 0, buf = 0x610002040f <error: Cannot access memory at address 0x610002040f>}, parentOrObjectDn = {size = 96, buf = 0x55ef5c0d0f4c ""}, attrValues = 0x0, immHandle = 0}, objModify = {ccbId = 1, adminOwnerId = 0, objectName = {size = 0, buf = 0x610002040f <error: Cannot access memory at address 0x610002040f>}, attrMods = 0x60, immHandle = 94486529904460}, objDelete = {ccbId = 1, adminOwnerId = 0, objectName = {size = 0, buf = 0x610002040f <error: Cannot access memory at address 0x610002040f>}, immHandle = 96}, obj_sync = {className = {size = 1, buf = 0x0}, objectName = {size = 132111, buf = 0x60 <error: Cannot access memory at address 0x60>}, attrValues = 0x55ef5c0d0f4c, next = 0x0}, finSync = {lastContinuationId = 1, adminOwners = 0x0, implementers = 0x610002040f, classes = 0x60, ccbResults = 0x55ef5c0d0f4c}, ccbId = 1, searchOp = {client_hdl = 1, searchId = 0}, searchInit = {client_hdl = 1, rootName = {size = 0, buf = 0x610002040f <error: Cannot access memory at address 0x610002040f>}, scope = 96, searchOptions = 94486529904460, searchParam = {present = 0, choice = {oneAttrParam = {attrName = {size = 0, buf = 0x0}, attrValueType = 0, attrValue = {val = {saint32 = 0, sauint32 = 0, saint64 = 0, sauint64 = 0, satime = 0, safloat = 0, sadouble = 0, x = {size = 0, buf = 0x0}}}}}}, attributeNames = 0x0, ccbId = 0}, rtAttUpdRpl = {sr = {client_hdl = 1, requestNodeId = 0, remoteNodeId = 0, searchId = 132111, objectName = {size = 96, buf = 0x55ef5c0d0f4c ""}, attributeNames = 0x0}, result = 0}, searchRemote = {client_hdl = 1, requestNodeId = 0, remoteNodeId = 0, searchId = 132111, objectName = {size = 96, buf = 0x55ef5c0d0f4c ""}, attributeNames = 0x0}, rspSrchRmte = {result = SA_AIS_OK, requestNodeId = 0, remoteNodeId = 0, searchId = 0, runtimeAttrs = {objectName = {size = 132111, buf = 0x60 <error: Cannot access memory at address 0x60>}, attrValuesList = 0x55ef5c0d0f4c}}, ctrl = {nodeId = 1, rulingEpoch = 0, fevsMsgStart = 0, ndExecPid = 132111, canBeCoord = 97, isCoord = 96 '`', syncStarted = 0 '\000', nodeEpoch = 0, pbeEnabled = 76 'L', dir = {size = 0, buf = 0x0}, xmlFile = {size = 0, buf = 0x0}, pbeFile = {size = 0, buf = 0x0}, ex_immd_node_id = 0}, adminitGlobal = {globalOwnerId = 1, i = {adminOwnerName = {_opaque = {0, 0, 0, 0, 0, 0, 1039, 2, 97, 0, 96, 0, 0, 0, 3916, 23565, 21999, 0 <repeats 112 times>}}, releaseOwnershipOnFinalize = false}}, ccbinitGlobal = {globalCcbId = 1, i = {adminOwnerId = 0, ccbFlags = 416611959823, client_hdl = 96}}, mds_info = {change = NCSMDS_NO_ACTIVE, dest = 0, svc_id = 132111, node_id = 97, role = 96}, syncFevsBase = 1, impl_delete = {size = 1, implNameList = 0x0}}}, immd = {type = 0, info = {ctrl_msg = {ndExecPid = 14, epoch = 0, refresh = 1 '\001', pbeEnabled = 0 '\000', dir = {size = 0, buf = 0x610002040f <error: Cannot access memory at address 0x610002040f>}, xmlFile = {size = 96, buf = 0x55ef5c0d0f4c ""}, pbeFile = {size = 0, buf = 0x0}, fevs_count = 0, admo_id_count = 0, ccb_id_count = 0, impl_count = 0, ex_immd_node_id = 0}, admown_init = {client_hdl = 14, i = {adminOwnerName = {_opaque = {1, 0, 0, 0, 0, 0, 0, 0, 1039, 2, 97, 0, 96, 0, 0, 0, 3916, 23565, 21999, 0 <repeats 110 times>}}, releaseOwnershipOnFinalize = false}}, ccb_init = {adminOwnerId = 14, ccbFlags = 1, client_hdl = 0}, impl_set = {r = {client_hdl = 14, impl_name = {size = 1, buf = 0x0}, impl_id = 132111, scope = 97, oi_timeout = 96}, reply_dest = 94486529904460}, objModify = {ccbId = 14, adminOwnerId = 0, objectName = {size = 1, buf = 0x0}, attrMods = 0x610002040f, immHandle = 96}, ccbId = 14, admoId = 14, fevsReq = {sender_count = 14, reply_dest = 1, client_hdl = 0, msg = {size = 132111, buf = 0x60 <error: Cannot access memory at address 0x60>}, isObjSync = 76 'L', ex_immd_node_id = 21999}, tmr_info = {type = 14, info = {immnd_dest = 1}}, mds_info = {change = 14, dest = 1, svc_id = 0, node_id = 0, role = 132111}, rda_info = {io_role = 14}, syncFevsBase = {fevsBase = 14, client_hdl = 1}, pbe2 = {epoch = 14, maxCcbId = 0, maxCommitTime = 1, maxWeakCcbId = 0, maxWeakCommitTime = 132111}, impl_delete = {size = 14, implNameList = 0x1}}}}, sinfo = {to_svc = 0, dest = 0, node_id = 0, stype = MDS_SENDTYPE_SND, ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}, mSynReqCount = 0 '\000', pid = 0, uid = 0, gid = 0}}
        proc_rc = <optimized out>
        tmpData = 0x55ef5c09ec50 "\320\064\b\\\357U"
        data = <optimized out>
        uba = {start = 0x55ef5c095a00, ub = 0x55ef5c095a00, bufp = 0x55ef5c0d0f5c "", res = 0, ttl = 96, max = 2081381920}
        size = 96
        __FUNCTION__ = "imma_evt_fake_evs"
#12 0x00007fcd79851885 in rt_object_delete_common (immOiHandle=<optimized out>, objectName=0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF", isObjectDnUsed=isObjectDnUsed@entry=true) at src/imm/agent/imma_oi_api.cc:3372
        rc = SA_AIS_OK
        cb = <optimized out>
        evt = {next = 0x0, type = IMMSV_EVT_TYPE_IMMND, info = {imma = {type = 0, info = {initRsp = {immHandle = 39, error = 0}, errRsp = {error = SA_AIS_ERR_NOT_READY, errStrings = 0xe00000000}, admInitRsp = {error = SA_AIS_ERR_NOT_READY, ownerId = 0}, ccbInitRsp = {error = SA_AIS_ERR_NOT_READY, ccbId = 0}, searchInitRsp = {error = SA_AIS_ERR_NOT_READY, searchId = 0}, searchNextRsp = 0x27, searchBundleNextRsp = 0x27, searchRemote = {client_hdl = 39, requestNodeId = 0, remoteNodeId = 14, searchId = 76, objectName = {size = 1543941520, buf = 0x0}, attributeNames = 0x0}, admOpReq = {adminOwnerId = 39, invocation = 0, operationId = 60129542144, continuationId = 76, timeout = 94486529486224, objectName = {size = 0, buf = 0x0}, params = 0x0}, admOpRsp = {oi_client_hdl = 39, invocation = 60129542144, result = 76, error = 0, parms = 0x55ef5c06ad90}, objCreate = {ccbId = 39, adminOwnerId = 0, className = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, parentOrObjectDn = {size = 1543941520, buf = 0x0}, attrValues = 0x0, immHandle = 0}, objDelete = {ccbId = 39, adminOwnerId = 0, objectName = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, immHandle = 94486529486224}, objModify = {ccbId = 39, adminOwnerId = 0, objectName = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, attrMods = 0x55ef5c06ad90, immHandle = 0}, ccbCompl = {ccbId = 39, implId = 0, invocation = 0, immHandle = 76}, classDescr = {className = {size = 39, buf = 0xe00000000 <error: Cannot access memory at address 0xe00000000>}, classCategory = 76, attrDefinitions = 0x55ef5c06ad90}, implSetRsp = {error = SA_AIS_ERR_NOT_READY, implId = 0}, tmr_info = {type = 39, adm_owner_hdl = 60129542144, client_hdl = 76, invocation = 94486529486224}}}, immnd = {dont_free_me = false, unused1 = false, unused2 = false, unused3 = false, error = 0, type = IMMND_EVT_A2ND_OI_OBJ_DELETE, info = {initReq = {version = {releaseCode = 0 '\000', majorVersion = 0 '\000', minorVersion = 0 '\000'}, client_pid = 14}, finReq = {client_hdl = 60129542144}, adminitReq = {client_hdl = 60129542144, i = {adminOwnerName = {_opaque = {76, 0, 0, 0, 44432, 23558, 21999, 0 <repeats 122 times>}}, releaseOwnershipOnFinalize = false}}, ccbinitReq = {adminOwnerId = 0, ccbFlags = 76, client_hdl = 94486529486224}, implSet = {client_hdl = 60129542144, impl_name = {size = 76, buf = 0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF"}, impl_id = 0, scope = 0, oi_timeout = 0}, admFinReq = {adm_owner_id = 0}, admReq = {adm_owner_id = 0, scope = 14, objectNames = 0x4c}, admOpReq = {adminOwnerId = 0, invocation = 14, operationId = 76, continuationId = 94486529486224, timeout = 0, objectName = {size = 0, buf = 0x0}, params = 0x0}, fevsReq = {sender_count = 60129542144, reply_dest = 76, client_hdl = 94486529486224, msg = {size = 0, buf = 0x0}, isObjSync = 0 '\000', ex_immd_node_id = 0}, admOpRsp = {oi_client_hdl = 60129542144, invocation = 76, result = 1543941520, error = 21999, parms = 0x0}, ccbUpcallRsp = {oi_client_hdl = 60129542144, ccbId = 76, implId = 0, inv = 1543941520, result = 21999, name = {_opaque = {0 <repeats 129 times>}}, errorString = {size = 0, buf = 0x0}}, classDescr = {className = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, classCategory = 1543941520, attrDefinitions = 0x0}, objCreate = {ccbId = 0, adminOwnerId = 14, className = {size = 76, buf = 0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF"}, parentOrObjectDn = {size = 0, buf = 0x0}, attrValues = 0x0, immHandle = 0}, objModify = {ccbId = 0, adminOwnerId = 14, objectName = {size = 76, buf = 0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF"}, attrMods = 0x0, immHandle = 0}, objDelete = {ccbId = 0, adminOwnerId = 14, objectName = {size = 76, buf = 0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF"}, immHandle = 0}, obj_sync = {className = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, objectName = {size = 1543941520, buf = 0x0}, attrValues = 0x0, next = 0x0}, finSync = {lastContinuationId = 0, adminOwners = 0x4c, implementers = 0x55ef5c06ad90, classes = 0x0, ccbResults = 0x0}, ccbId = 0, searchOp = {client_hdl = 60129542144, searchId = 76}, searchInit = {client_hdl = 60129542144, rootName = {size = 76, buf = 0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF"}, scope = 0, searchOptions = 0, searchParam = {present = 0, choice = {oneAttrParam = {attrName = {size = 0, buf = 0x0}, attrValueType = 0, attrValue = {val = {saint32 = 0, sauint32 = 0, saint64 = 0, sauint64 = 0, satime = 0, safloat = 0, sadouble = 0, x = {size = 0, buf = 0x0}}}}}}, attributeNames = 0x0, ccbId = 0}, rtAttUpdRpl = {sr = {client_hdl = 60129542144, requestNodeId = 76, remoteNodeId = 0, searchId = 1543941520, objectName = {size = 0, buf = 0x0}, attributeNames = 0x0}, result = 0}, searchRemote = {client_hdl = 60129542144, requestNodeId = 76, remoteNodeId = 0, searchId = 1543941520, objectName = {size = 0, buf = 0x0}, attributeNames = 0x0}, rspSrchRmte = {result = 0, requestNodeId = 14, remoteNodeId = 76, searchId = 0, runtimeAttrs = {objectName = {size = 1543941520, buf = 0x0}, attrValuesList = 0x0}}, ctrl = {nodeId = 0, rulingEpoch = 14, fevsMsgStart = 76, ndExecPid = 1543941520, canBeCoord = 21999, isCoord = 0 '\000', syncStarted = 0 '\000', nodeEpoch = 0, pbeEnabled = 0 '\000', dir = {size = 0, buf = 0x0}, xmlFile = {size = 0, buf = 0x0}, pbeFile = {size = 0, buf = 0x0}, ex_immd_node_id = 0}, adminitGlobal = {globalOwnerId = 0, i = {adminOwnerName = {_opaque = {14, 0, 76, 0, 0, 0, 44432, 23558, 21999, 0 <repeats 120 times>}}, releaseOwnershipOnFinalize = false}}, ccbinitGlobal = {globalCcbId = 0, i = {adminOwnerId = 76, ccbFlags = 94486529486224, client_hdl = 0}}, mds_info = {change = NCSMDS_NONE, dest = 76, svc_id = 1543941520, node_id = 21999, role = 0}, syncFevsBase = 60129542144, impl_delete = {size = 0, implNameList = 0x4c}}}, immd = {type = 0, info = {ctrl_msg = {ndExecPid = 39, epoch = 0, refresh = 0 '\000', pbeEnabled = 0 '\000', dir = {size = 76, buf = 0x55ef5c06ad90 "safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF"}, xmlFile = {size = 0, buf = 0x0}, pbeFile = {size = 0, buf = 0x0}, fevs_count = 0, admo_id_count = 0, ccb_id_count = 0, impl_count = 0, ex_immd_node_id = 0}, admown_init = {client_hdl = 39, i = {adminOwnerName = {_opaque = {0, 0, 14, 0, 76, 0, 0, 0, 44432, 23558, 21999, 0 <repeats 118 times>}}, releaseOwnershipOnFinalize = false}}, ccb_init = {adminOwnerId = 39, ccbFlags = 60129542144, client_hdl = 76}, impl_set = {r = {client_hdl = 39, impl_name = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, impl_id = 1543941520, scope = 21999, oi_timeout = 0}, reply_dest = 0}, objModify = {ccbId = 39, adminOwnerId = 0, objectName = {size = 0, buf = 0x4c <error: Cannot access memory at address 0x4c>}, attrMods = 0x55ef5c06ad90, immHandle = 0}, ccbId = 39, admoId = 39, fevsReq = {sender_count = 39, reply_dest = 60129542144, client_hdl = 76, msg = {size = 1543941520, buf = 0x0}, isObjSync = 0 '\000', ex_immd_node_id = 0}, tmr_info = {type = 39, info = {immnd_dest = 60129542144}}, mds_info = {change = 39, dest = 60129542144, svc_id = 76, node_id = 0, role = 1543941520}, rda_info = {io_role = 39}, syncFevsBase = {fevsBase = 39, client_hdl = 60129542144}, pbe2 = {epoch = 39, maxCcbId = 0, maxCommitTime = 0, maxWeakCcbId = 76, maxWeakCommitTime = 1543941520}, impl_delete = {size = 39, implNameList = 0xe00000000}}}}, sinfo = {to_svc = 0, dest = 0, node_id = 0, stype = MDS_SENDTYPE_SND, ctxt = {length = 0 '\000', data = '\000' <repeats 11 times>}, mSynReqCount = 0 '\000', pid = 0, uid = 0, gid = 0}}
        out_evt = 0x0
        cl_node = 0x55ef5bffb410
        locked = false
        __FUNCTION__ = "rt_object_delete_common"
#13 0x00007fcd7985629a in saImmOiRtObjectDelete_o3 (immOiHandle=<optimized out>, objectName=<optimized out>) at src/imm/agent/imma_oi_api.cc:3250
No locals.
#14 0x000055ef5b434e26 in avd_saImmOiRtObjectDelete_sync (dn="safSISU=safSu=SC-5\\,safSg=NoRed\\,safApp=OpenSAF,safSi=NoRed4,safApp=OpenSAF") at src/amf/amfd/imm.cc:1893
        t_ = {trace_leave_called = false, file_ = 0x55ef5b4be148 "src/amf/amfd/imm.cc", function_ = 0x55ef5b4bf240 <avd_saImmOiRtObjectDelete_sync(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::__FUNCTION__> "avd_saImmOiRtObjectDelete_sync"}
        __FUNCTION__ = "avd_saImmOiRtObjectDelete_sync"
        rc = SA_AIS_OK
        isImmReady = <optimized out>

Related

Wiki: ChangeLog-5.20.11

Discussion

  • Thuan Tran

    Thuan Tran - 2020-11-17
    • status: assigned --> review
     
  • Thuan Tran

    Thuan Tran - 2020-11-27
    • status: review --> fixed
     
  • Thuan Tran

    Thuan Tran - 2020-11-27

    commit 186349b64e20220a161f6fba47ccfc02f70165bf (HEAD -> develop, origin/develop)
    Author: thuan.tran thuan.tran@dektech.com.au
    Date: Wed Nov 11 11:47:32 2020 +0700

    imm: fix amfd stuck when multi partitioned clusters rejoin [#3237]
    
    - IMMND coordinator take longer time to sync because incorrectly
    postpone sync to wait for incorrect number of down nodes.
    - IMMND should restart after being accepted re-intro and not be
    a new coordinator to sync again with new coordinator.
    - Active IMMD only update ex-IMMD from coordinator if info exist.
    Update ex-IMMD to node id itself when new coord announce sync.
    - Update #3228 solution: active IMMD should not drop re-intro
    from local IMMND, it causes unexpected IMMND coord selected then
    local IMMND unexpected restart later.
    - IMMND on active IMMD node will start split-brain detected timer
    to reboot node if see another acitve IMMD, not reboot immedidately
    to avoid messing up RDE split-brain detection mechanism.
    - Quick reboot sometimes not quick then active IMMD on node may
    impact to new promoted Active node. Let stop AMFND, kill AMFD/IMMD
    to avoid any impact.
    
     

Log in to post a comment.