Reproduce:
- Increase logMaxPendingWriteRequests to increase the resilience of log service
- Make NFS hang
- Make out of sync on standby
- Make NFS normal
- Trigger switch-over
- expect osaflogd on stanby coredump when got peer checkpoint
backtrace
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `/usr/local/lib/opensaf/osaflogd'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0 __gnu_cxx::__atomic_add (__val=1, __mem=0x469) at /usr/include/c++/9/ext/atomicity.h:96
96 /usr/include/c++/9/ext/atomicity.h: No such file or directory.
[Current thread is 1 (Thread 0x7fb7e18e2bc0 (LWP 252))]
(gdb) bt
#0 __gnu_cxx::__atomic_add (__val=1, __mem=0x469) at /usr/include/c++/9/ext/atomicity.h:96
#1 __gnu_cxx::__atomic_add_dispatch (__val=1, __mem=0x469) at /usr/include/c++/9/ext/atomicity.h:96
#2 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_add_ref_copy (this=0x461) at /usr/include/c++/9/bits/shared_ptr_base.h:139
#3 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count (__r=..., this=<synthetic pointer>) at /usr/include/c++/9/bits/shared_ptr_base.h:737
#4 std::__shared_ptr<Cache::Data, (__gnu_cxx::_Lock_policy)2>::__shared_ptr (this=<synthetic pointer>) at /usr/include/c++/9/bits/shared_ptr_base.h:1167
#5 std::shared_ptr<Cache::Data>::shared_ptr (this=<synthetic pointer>) at /usr/include/c++/9/bits/shared_ptr.h:129
#6 Cache::Front (this=0x55d83478c7c0 <Cache::instance()::cache>) at ./src/log/logd/lgs_cache.h:230
#7 ckpt_proc_pop_write_async (cb=<optimized out>, data=0x7ffc9afc7270) at src/log/logd/lgs_mbcsv_v8.cc:229
#8 0x000055d83474c584 in ckpt_decode_log_struct (cb=cb@entry=0x55d83478cc20 <_lgs_cb>, cbk_arg=cbk_arg@entry=0x7ffc9afc7390, ckpt_msg=ckpt_msg@entry=0x7ffc9afc7270, struct_ptr=<optimized out>,
struct_ptr@entry=0x7ffc9afc7280,
edp_function=edp_function@entry=0x55d83476d860 <EncodeDecodePopAndWriteAsync(edu_hdl_tag*, edu_tkn_tag*, void*, unsigned int*, edu_buf_env_tag*, EDP_OP_TYPE, EDU_ERR*)>)
at src/log/logd/lgs_mbcsv.cc:1206
#9 0x000055d83476dc3a in DecodePopAndWriteAsync (cb=cb@entry=0x55d83478cc20 <_lgs_cb>, ckpt_msg=ckpt_msg@entry=0x7ffc9afc7270, cbk_arg=cbk_arg@entry=0x7ffc9afc7390) at src/log/logd/lgs_mbcsv_v8.cc:185
#10 0x000055d83474c831 in ckpt_decode_async_update (cb=0x55d83478cc20 <_lgs_cb>, cbk_arg=0x7ffc9afc7390) at src/log/logd/lgs_mbcsv.cc:1619
#11 0x000055d83474e9ae in ckpt_decode_cbk_handler (cbk_arg=0x7ffc9afc7390) at src/log/logd/lgs_mbcsv.cc:1136
#12 mbcsv_callback (arg=0x7ffc9afc7390) at src/log/logd/lgs_mbcsv.cc:636
#13 0x00007fb7e1eef44a in ncs_mbscv_rcv_decode (peer=peer@entry=0x55d8352ecbd0, evt=evt@entry=0x7fb7d40069c0) at src/mbc/mbcsv_act.c:409
#14 0x00007fb7e1eef5e6 in ncs_mbcsv_rcv_async_update (peer=0x55d8352ecbd0, evt=0x7fb7d40069c0) at src/mbc/mbcsv_act.c:460
#15 0x00007fb7e1ef6485 in mbcsv_process_events (rcvd_evt=0x7fb7d40069c0, mbcsv_hdl=mbcsv_hdl@entry=4293918753) at src/mbc/mbcsv_pr_evts.c:166
#16 0x00007fb7e1ef65f7 in mbcsv_hdl_dispatch_all (mbcsv_hdl=4293918753, mbx=<optimized out>, mbx@entry=4288675841) at src/mbc/mbcsv_pr_evts.c:271
#17 0x00007fb7e1ef0b2a in mbcsv_process_dispatch_request (arg=0x7ffc9afc74e0) at src/mbc/mbcsv_api.c:426
#18 0x000055d83474a02a in lgs_mbcsv_dispatch (mbcsv_hdl=<optimized out>) at src/log/logd/lgs_mbcsv.cc:509
#19 0x000055d8347262cd in main (argc=<optimized out>, argv=<optimized out>) at src/log/logd/lgs_main.cc:592
Diff:
commit 47dee89190f1be5714010b5e546dc6a3168a74f0 (HEAD -> develop, origin/develop, ticket-3334)
Author: thien.m.huynh thien.m.huynh@dektech.com.au
Date: Thu Apr 6 18:07:26 2023 +0700