Menu

#3345 amf: amfd crash during remove amf model

5.24.02
fixed
None
defect
amf
d
major
False
2024-01-05
2023-12-26
No

Step reproduce by using amf-demo:

  • Load model
    immcfg -f AppConfig-2N.xml

  • Start app

immfind | grep -E "^safSu=SU" | xargs -I '{}' amf-adm unlock-in '{}'
immfind | grep -E "^safSu=SU" | xargs -I '{}' amf-adm unlock '{}'
  • kill osafimmnd and amf-demo on standby node
    pkill -9 osafimmnd; pkill -9 amf_demo

  • Stop app

immfind | grep -E "^safSu=SU" | xargs -I '{}' amf-adm lock '{}'
immfind | grep -E "^safSu=SU" | xargs -I '{}' amf-adm lock-in '{}'
  • Remove app
    immfind -c SaAmfApplication | grep AmfDemo | xargs -I '{}' immcfg -d '{}'

coredump happens
2023-12-26 10:42:42.648 SC-2 osafamfd[278]: src/amf/amfd/app.cc:333: app_ccb_apply_cb: Assertion 'app->list_of_sg == nullptr' failed.

amfd backtrace:

[New LWP 279]
[New LWP 281]
[New LWP 282]
[New LWP 283]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `/usr/local/lib/opensaf/osafamfd'.
Program terminated with signal SIGABRT, Aborted.
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
[Current thread is 1 (Thread 0x7f473f6b3f80 (LWP 279))]

Thread 4 (Thread 0x7f473f61db00 (LWP 283)):
#0  0x00007f473f9a7bbf in __GI___poll (fds=fds@entry=0x7f473f61d1c8, nfds=nfds@entry=1, timeout=timeout@entry=30000) at ../sysdeps/unix/sysv/linux/poll.c:29
        resultvar = 18446744073709551100
        sc_cancel_oldtype = 0
        sc_ret = <optimized out>
#1  0x00007f473fceb8d7 in poll (__timeout=30000, __nfds=1, __fds=0x7f473f61d1c8) at /usr/include/x86_64-linux-gnu/bits/poll2.h:46
No locals.
#2  osaf_ppoll (io_fds=0x7f473f61d1c8, i_nfds=1, i_timeout_ts=0x7f473f61d1a0, i_sigmask=<optimized out>) at src/base/osaf_poll.c:108
        current_time = {tv_sec = 12, tv_nsec = -13510792149241856}
        elapsed_time = {tv_sec = 0, tv_nsec = 0}
        time_left = 30000
        start_time = {tv_sec = 94721, tv_nsec = 884318150}
        time_left_ts = <optimized out>
        result = <optimized out>
#3  0x00007f473fceb9bc in osaf_poll (io_fds=<optimized out>, i_nfds=<optimized out>, i_timeout=<optimized out>) at src/base/osaf_poll.c:46
        timeout_ts = {tv_sec = 30, tv_nsec = 0}
#4  0x00007f473fceb9ff in osaf_poll_one_fd (i_fd=i_fd@entry=25, i_timeout=i_timeout@entry=30000) at src/base/osaf_poll.c:133
        set = {fd = 25, events = 1, revents = 0}
        result = <optimized out>
#5  0x00007f473fd23203 in rda_read_msg (sockfd=25, msg=msg@entry=0x7f473f61d230 "10 2", size=64) at src/rde/agent/rda_papi.cc:691
        rc = <optimized out>
        msg_size = 0
#6  0x00007f473fd23762 in rda_callback_task (rda_callback_cb=0x5631cdf23b60) at src/rde/agent/rda_papi.cc:164
        msg = "10 2", '\000' <repeats 59 times>
        rc = <optimized out>
        value = 2
        retry_count = 0
        conn_lost = false
        cmd_type = RDE_RDA_HA_ROLE
        cb_info = {cb_type = PCS_RDA_ROLE_CHG_IND, info = {io_role = PCS_RDA_STANDBY}}
        role = PCS_RDA_UNDEFINED
#7  0x00007f473fa8f609 in start_thread (arg=<optimized out>) at pthread_create.c:477
        ret = <optimized out>
        pd = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139943982783232, 6782476873984862877, 140734800747550, 140734800747551, 140734800747712, 139943982781312, -6868468769305695587, -6868467058807838051}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = 0
#8  0x00007f473f9b4353 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.

Thread 3 (Thread 0x7f473f63eb00 (LWP 282)):
#0  0x00007f473f9a7bbf in __GI___poll (fds=fds@entry=0x7f473f63e220, nfds=nfds@entry=4, timeout=timeout@entry=20000) at ../sysdeps/unix/sysv/linux/poll.c:29
        resultvar = 18446744073709551100
        sc_cancel_oldtype = 0
        sc_ret = <optimized out>
#1  0x00007f473fd1ad77 in poll (__timeout=20000, __nfds=4, __fds=0x7f473f63e220) at /usr/include/x86_64-linux-gnu/bits/poll2.h:46
No locals.
#2  mdtm_process_recv_events () at src/mds/mds_dt_tipc.c:844
        pollres = <optimized out>
        pfd = {{fd = 18, events = 1, revents = 0}, {fd = 19, events = 1, revents = 0}, {fd = 21, events = 1, revents = 0}, {fd = -1, events = 1, revents = 0}}
        event = {event = 0, found_lower = 0, found_upper = 0, port = {ref = 0, node = 0}, s = {seq = {type = 0, lower = 0, upper = 0}, timeout = 0, filter = 0, usr_handle = "\000\000\000\000\000\000\000"}}
        timerfd = -1
        __FUNCTION__ = "mdtm_process_recv_events"
#3  0x00007f473fa8f609 in start_thread (arg=<optimized out>) at pthread_create.c:477
        ret = <optimized out>
        pd = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139943982918400, 6782476873984862877, 140734800746766, 140734800746767, 140734800746928, 139943982916480, -6868468750515213667, -6868467058807838051}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = 0
#4  0x00007f473f9b4353 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.

Thread 2 (Thread 0x7f473f65fb00 (LWP 281)):
#0  0x00007f473f9a7bbf in __GI___poll (fds=fds@entry=0x7f473f65f278, nfds=nfds@entry=1, timeout=timeout@entry=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
        resultvar = 18446744073709551100
        sc_cancel_oldtype = 0
        sc_ret = <optimized out>
#1  0x00007f473fceb75e in poll (__timeout=-1, __nfds=1, __fds=0x7f473f65f278) at /usr/include/x86_64-linux-gnu/bits/poll2.h:46
No locals.
#2  osaf_poll_no_timeout (io_fds=0x7f473f65f278, i_nfds=1) at src/base/osaf_poll.c:31
        result = <optimized out>
#3  0x00007f473fceb9ff in osaf_poll_one_fd (i_fd=17, i_timeout=i_timeout@entry=-1) at src/base/osaf_poll.c:133
        set = {fd = 17, events = 1, revents = 0}
        result = <optimized out>
#4  0x00007f473fce8a27 in (anonymous namespace)::ncs_tmr_wait (arg=0x5631cdf2ca70) at ./src/base/timer/timer_handle.h:76
        handle = 0x5631cdf2ca70
        __FUNCTION__ = "ncs_tmr_wait"
#5  0x00007f473fa8f609 in start_thread (arg=<optimized out>) at pthread_create.c:477
        ret = <optimized out>
        pd = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139943983053568, 6782476873984862877, 140734800746830, 140734800746831, 140734800746992, 139943983051648, -6868468802591692131, -6868467058807838051}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = 0
#6  0x00007f473f9b4353 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.

Thread 1 (Thread 0x7f473f6b3f80 (LWP 279)):
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
        set = {__val = {0, 7234307576302018599, 0 <repeats 14 times>}}
        pid = <optimized out>
        tid = <optimized out>
        ret = <optimized out>
#1  0x00007f473f8b7859 in __GI_abort () at abort.c:79
        save_stage = 1
        act = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0 <repeats 15 times>, 140734800746608}}, sa_flags = -839762424, sa_restorer = 0x7fff5fce5820}
        sigs = {__val = {32, 0 <repeats 15 times>}}
#2  0x00007f473fcf03bf in __osafassert_fail (__file=__file@entry=0x5631cd933234 "src/amf/amfd/app.cc", __line=__line@entry=335, __func=__func@entry=0x5631cd933434 "app_ccb_apply_cb", __assertion=__assertion@entry=0x5631cd933461 "app->list_of_sg == nullptr") at src/base/sysf_def.c:296
No locals.
#3  0x00005631cd857615 in app_ccb_apply_cb (opdata=0x5631cdfbe300) at src/amf/amfd/app.cc:335
        app = 0x7f4734002800
        t_ = {trace_leave_called = false, file_ = 0x5631cd933234 "src/amf/amfd/app.cc", function_ = 0x5631cd933434 "app_ccb_apply_cb"}
        __FUNCTION__ = "app_ccb_apply_cb"
        __PRETTY_FUNCTION__ = "void app_ccb_apply_cb(CcbUtilOperationData_t*)"
#4  0x00005631cd8a3826 in ccb_apply_cb (immoi_handle=<optimized out>, ccb_id=<optimized out>) at src/amf/amfd/imm.cc:1265
        ccb_util_ccb_data = <optimized out>
        opdata = 0x0
        type = <optimized out>
        next = 0x5631cdf8b680
        temp = <optimized out>
        t_ = {trace_leave_called = false, file_ = 0x5631cd93bd60 "src/amf/amfd/imm.cc", function_ = 0x5631cd93b12c "ccb_apply_cb"}
        __FUNCTION__ = "ccb_apply_cb"
#5  0x00007f473fd7f63c in imma_process_callback_info (cb=0x7f473fd8e320 <imma_cb>, cl_node=<optimized out>, callback=0x7f473401d5d0, immHandle=<optimized out>) at src/imm/agent/imma_proc.cc:2539
        ccbid = 6
        privateAugOmHandle = 0
        t_ = {trace_leave_called = false, file_ = 0x7f473fd8985d "src/imm/agent/imma_proc.cc", function_ = 0x7f473fd89b68 "imma_process_callback_info"}
        __FUNCTION__ = "imma_process_callback_info"
        clientCapable = true
        isPbeOp = false
        isExtendedNameValid = false
        isAttrExtendedName = false
#6  0x00007f473fd81ab1 in imma_hdl_callbk_dispatch_all (cb=0x7f473fd8e320 <imma_cb>, immHandle=<optimized out>) at src/imm/agent/imma_proc.cc:1868
        callback = 0x7f473401d5d0
        cl_node = 0x5631cdf27020
#7  0x00007f473fd782f7 in saImmOiDispatch (immOiHandle=<optimized out>, dispatchFlags=SA_DISPATCH_ALL) at src/imm/agent/imma_oi_api.cc:642
        rc = SA_AIS_OK
        cb = <optimized out>
        cl_node = 0x0
        locked = false
        pend_fin = 0
        pend_dis = 0
        t_ = {trace_leave_called = false, file_ = 0x7f473fd855a4 "src/imm/agent/imma_oi_api.cc", function_ = 0x7f473fd8589b "saImmOiDispatch"}
        __FUNCTION__ = "saImmOiDispatch"
#8  0x00005631cd8a59eb in main_loop () at src/amf/amfd/main.cc:746
        pollretval = <optimized out>
        cb = 0x5631cd975dc0 <_control_block>
        evt = <optimized out>
        mbx_fd = <optimized out>
        error = <optimized out>
        old_sync_state = AVD_STBY_IN_SYNC
        polltmo = <optimized out>
        term_fd = 27
        hangup_fd = 29
        hangup_sel_obj = 0x7f473fd504f8 <hangup_sel_obj>
        __FUNCTION__ = <optimized out>
#9  0x00005631cd854fba in main (argc=<optimized out>, argv=<optimized out>) at src/amf/amfd/main.cc:883
No locals.
50  ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.

Related

Wiki: ChangeLog-5.24.02

Discussion

  • Thien Minh Huynh

    • status: accepted --> review
     
  • Thien Minh Huynh

    • status: review --> fixed
     
  • Thien Minh Huynh

    commit 3463fa7734dab0b00365e6665eee4f896ed6268e (HEAD -> develop, origin/develop, ticket-3345)
    Author: thien.m.huynh thien.m.huynh@dektech.com.au
    Date: Fri Dec 29 13:14:58 2023 +0700

    amf: check the existing SG before creating a new one [#3345]
    
    When IMM is restarted, AMF will be re-initialized.
    AMF add duplicate AMF SG entities to the local database.
    It causes crashing while trying to delete an App object.
    
    Solution is to check for existing SG before create a new one.
    
     

Log in to post a comment.