I'm using sfcb-1.3.12-53.ppcnf.
SFCB seems to be stuck and fails to respond to any incoming CIM requests.
This issue has been seen a couple of times & is not reproducible consistently.
This issue is a show stopper for us and requires appropriate attention from SFCB team.
At the point when it gets stuck, we forced killall -11 sfcbd to kill all sfcbd processes to force a dump.
Here are the back traces for relevant SFCB processes:
$ ps ax | grep sf
21981 ? Ssl 2:47 /usr/sbin/sfcbd -d
21982 ? S 0:00 /usr/sbin/sfcbd -d
21984 ? S 0:01 /usr/sbin/sfcbd -d
21985 ? S 60:59 /usr/sbin/sfcbd -d
21991 ? Sl 0:00 /usr/sbin/sfcbd -d
21992 ? Sl 2:19 /usr/sbin/sfcbd -d
21996 ? S 2:36 /usr/sbin/sfcbd -d
6310 ? S 0:00 /usr/sbin/sfcbd -d
6446 ? S 0:00 /usr/sbin/sfcbd -d
7079 ? S 0:00 /usr/sbin/sfcbd -d
10540 ? S 0:00 /usr/sbin/sfcbd -d
11206 ? S 0:00 /usr/sbin/sfcbd -d
11328 ? S 0:00 /usr/sbin/sfcbd -d
11466 ? S 0:00 /usr/sbin/sfcbd -d
12121 ? S 0:00 /usr/sbin/sfcbd -d
26824 ? Sl 0:00 /usr/sbin/sfcbd -d
26858 ? Sl 0:00 /usr/sbin/sfcbd -d
26869 ? Sl 0:00 /usr/sbin/sfcbd -d
22040 ? Sl 3461:38 /usr/sbin/sfcbd -d
Process 22040 detached
$ strace ~p 26869
$ strace -p 26869
Process 26869 attached - interrupt to quit
recvmsg(23, <unfinished ...>
Process 26869 detached
$ strace -p 21992
Process 21992 attached - interrupt to quit
recvmsg(13, <unfinished ...>
Process 21992 detached
$ strace -p 21991
Process 21991 attached - interrupt to quit
recvmsg(11, <unfinished ...>
Process 21991 detached
$ killall -11 sfcbd
$ ps ax | grep sf
21981 ? Ssl 2:47 /usr/sbin/sfcbd -d
21985 ? S 61:01 /usr/sbin/sfcbd -d
21996 ? S 2:36 /usr/sbin/sfcbd -d
22040 ? Sl 3471:05 /usr/sbin/sfcbd -d
21992 ? Sl 2:19 /usr/sbin/sfcbd -d
21991 ? S 0:00 /usr/sbin/sfcbd -d
28428 ? Sl 0:00 /usr/sbin/sfcbd -d
28462 ? Sl 0:00 /usr/sbin/sfcbd -d
28471 ? Sl 0:00 /usr/sbin/sfcbd -d
29018 ? Ss 0:00 /bin/fsh /tmp/sfcbd.0141700.fsh
29020 ? Ss 0:00 /bin/fsh /tmp/sfcbd.0776657.fsh
29022 ? Ss 0:00 /bin/fsh /tmp/sfcbd.0812832.fsh
29024 ? Ss 0:00 /bin/fsh /tmp/sfcbd.0836741.fsh
29027 ? S 0:00 sh -c sttr sigterm sfcbd
29028 ? Rl 0:00 sttr sigterm sfcbd
29029 ? S 0:00 /bin/fsh -x /etc/fspinit/halt 11328 sfcbd
29039 ? S 0:00 sh -c sttr sigterm sfcbd
29040 ? R 0:00 sttr sigterm sfcbd
29046 pts/0 R+ 0:00 grep sf
Program terminated with signal 11, Segmentation fault.
#0 0x0fcdcc3c in semop (semid=<value optimized out>, sops=<value optimized out>, nsops=<value optimized out>) at ../sysdeps/unix/sysv/linux/semop.c:36
36 ../sysdeps/unix/sysv/linux/semop.c: No such file or directory.
in ../sysdeps/unix/sysv/linux/semop.c
(gdb) bt
**NOTE: debug frames are hidden in bt display**
#0 0x0fcdcc3c in semop (semid=<value optimized out>, sops=<value optimized out>, nsops=<value optimized out>) at ../sysdeps/unix/sysv/linux/semop.c:36
#1 0x0ff2f1a4 in semAcquire (semid=720913, semnum=0) at msgqueue.c:76
#2 0x0ffdaaf4 in handleHttpRequest (connFd=70, sslMode=1) at httpAdapter.c:1077
#3 0x0ffdb8d8 in acceptRequest (sock=69, ssin=0x7fc035e8, sin_len=28, sslMode=1) at httpAdapter.c:1383
#4 0x0ffdc840 in httpDaemon (argc=2, argv=0x7fc039d4, sslMode=1, sfcbPid=265981960) at httpAdapter.c:1736
#5 0x10003214 in startHttpd (argc=2, argv=0x7fc039d4, sslMode=1) at sfcBroker.c:399
#6 0x10003f8c in main (argc=2, argv=0x7fc039d4) at sfcBroker.c:753
(gdb)
Program terminated with signal 11, Segmentation fault.
#0 0x0fd94edc in recvmsg () from /opt/mcp/ppcnf/crossroot/lib/libpthread.so.0
(gdb) bt
**NOTE: debug frames are hidden in bt display**
#0 0x0fd94edc in recvmsg () from /opt/mcp/ppcnf/crossroot/lib/libpthread.so.0
#1 0x0ff2fad4 in spGetMsg (s=0x7fc02b78, from=0x7fc029a0, data=0x7fc02988, length=20, mqg=0x7fc0299c) at msgqueue.c:214
#2 0x0ff2fe14 in spRcvMsg (s=0x7fc02b78, from=0x7fc02a40, data=0x7fc02a3c, length=0x7fc02a38, mqg=0x7fc0299c) at msgqueue.c:275
#3 0x0ff306bc in spRecvResult (s=0x7fc02b78, from=0x7fc02a40, data=0x7fc02a3c, length=0x7fc02a38) at msgqueue.c:378
#4 0x0ff381cc in intInvokeProvider (ctx=0x7fc02bd0, sockets={receive = 72, send = 73}) at providerMgr.c:1246
#5 0x0ff38584 in invokeProvider (ctx=0x7fc02bd0) at providerMgr.c:1295
#6 0x0ffa3f58 in createInstance (ctx=0x7fc03250, hdr=0x7fc02dbc) at cimXmlRequest.c:1260
#7 0x0ffab6a8 in sendHdrToHandler (hdr=0x7fc02dbc, ctx=0x7fc03250) at cimXmlRequest.c:2660
#8 0x0ffaba80 in handleCimXmlRequest (ctx=0x7fc03250, flags=0) at cimXmlRequest.c:2732
#9 0x0ffda714 in doHttpRequest (conn_fd={socket = 70, file = 0x1002f5d8, buf = 0x10030c50, bio = 0x1002d560, ssl = 0x1002daa8}) at httpAdapter.c:1026
#10 0x0ffdb2b0 in handleHttpRequest (connFd=70, sslMode=1) at httpAdapter.c:1202
#11 0x0ffdb8d8 in acceptRequest (sock=69, ssin=0x7fc035e8, sin_len=28, sslMode=1) at httpAdapter.c:1383
#12 0x0ffdc840 in httpDaemon (argc=2, argv=0x7fc039d4, sslMode=1, sfcbPid=265981960) at httpAdapter.c:1736
#13 0x10003214 in startHttpd (argc=2, argv=0x7fc039d4, sslMode=1) at sfcBroker.c:399
#14 0x10003f8c in main (argc=2, argv=0x7fc039d4) at sfcBroker.c:753
(gdb)
Program terminated with signal 11, Segmentation fault.
#0 0x0fccb2b4 in __read_nocancel () at ../stdlib/stdlib.h:342
342 ../stdlib/stdlib.h: No such file or directory.
in ../stdlib/stdlib.h
(gdb) bt
**NOTE: debug frames are hidden in bt display**
#0 0x0fccb2b4 in __read_nocancel () at ../stdlib/stdlib.h:342
#1 0x0fc7aba4 in _IO_new_file_underflow (fp=0x10016008) at fileops.c:593
#2 0x0fc7b014 in _IO_default_uflow (fp=0x234) at genops.c:435
#3 0x0fc7cf18 in *__GI___uflow (fp=0x10016008) at genops.c:389
#4 0x0fc6d18c in _IO_getline_info (fp=0x234, buf=0x7fc02644 "*** httpAdapter.c:1175 Error accepting SSL connection -- exiting\n", n=4095, delim=805441536, extract_delim=1,
eof=0x0)
at iogetline.c:74
#6 0x0fc6d080 in _IO_getline (fp=0x234,
buf=0x30021000 "\003-#- ClassProvider - 21985 provider exiting due to a SIGSEGV signal\n\003-#- InternalProvider - 21996 provider exiting due to a SIGSEGV signal\n\003-#-
FipS_IndProvider - 22040 provider exiting due to a SIGSE"..., n=4096, delim=10, extract_delim=1) at iogetline.c:42
#7 0x0fc6ba4c in _IO_fgets (buf=0x7fc02644 "*** httpAdapter.c:1175 Error accepting SSL connection -- exiting\n", n=4096, fp=0x10016008) at iofgets.c:58
#8 0x0ff5e5a0 in runLogger (listenFd=4, level=3) at mlog.c:54
#9 0x0ff5e6dc in startLogging (level=3) at mlog.c:92
#10 0x10003930 in main (argc=2, argv=0x7fc039d4) at sfcBroker.c:604
Program terminated with signal 6, Aborted.
#0 0x0fc3c5a0 in *__GI_raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
64 ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
in ../nptl/sysdeps/unix/sysv/linux/raise.c
(gdb) bt
**NOTE: debug frames are hidden in bt display**
#0 0x0fc3c5a0 in *__GI_raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64
#1 0x0fc3dcec in *__GI_abort () at abort.c:88
#2 0x0ff3629c in _methProvider (ctx=0x7ff3459c, req=0x7ff345e8) at providerMgr.c:844
#3 0x0ff39b44 in startUpProvider (ns=0xff62f20 "", name=0xff6326c "") at providerMgr.c:1575
#4 0x0ff366dc in processProviderMgrRequests () at providerMgr.c:934
#5 0x10003fb8 in main (argc=2, argv=0x7ff34a14) at sfcBroker.c:77
(gdb) bt
**NOTE: debug frames are hidden in bt display**
#0 0x0ff1b7c8 in __flush_mt (mt=0x10021688) at support.c:386
#1 0x0ff1c990 in tool_mm_flush () at support.c:620
#2 0x0ff4cd4c in processProviderInvocationRequestsThread (prms=0x100214a8) at providerDrv.c:2819
#3 0x0ff4d41c in processProviderInvocationRequests (name=0x100173a8 "ProfileProvider") at providerDrv.c:2915
#4 0x0ff3d220 in getProcess (info=0x100172e8, proc=0x7ff34450) at providerDrv.c:761
#5 0x0ff3d768 in forkProvider (info=0x100172e8, req=0x7ff345e8, msg=0x0) at providerDrv.c:811
#6 0x0ff364ac in _methProvider (ctx=0x7ff3459c, req=0x7ff345e8) at providerMgr.c:866
#7 0x0ff39b44 in startUpProvider (ns=0xff62f20 "", name=0xff63374 "") at providerMgr.c:1575
#8 0x0ff36714 in processProviderMgrRequests () at providerMgr.c:943
#9 0x10003fb8 in main (argc=2, argv=0x7ff34a14) at sfcBroker.c:77
One more thing, starting and restarting the SFCBD process seems to recover the CIM functionality.
Were these cores dumped before SFCB was in its hung state or did you force them to be dumped?
We forced a dump when Sfcb was in this state.
Can you provide any details or steps to reproduce this issue? Also what version of FSP is used?
3498496-79580-fix.patch
patch applied to cvs head
committed to git Apr 4