From: Zoran V. <vas...@us...> - 2005-10-08 16:26:34
|
Update of /cvsroot/naviserver/naviserver/nsd In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25813/nsd Modified Files: nsmain.c Log Message: Added alarm of 300 seconds for the watchdog on Darwin. When the alarm triggers, the watchdog re-checks the existence of the server process (by sending kill 0) and clears nsconf-pid value if the process is gone. This is the signal to re-start the server again. All this mess is needed because under some strange circumstances, the WaitForServer() never returns although the server process is already gone. This smells of a broken signal delivery to me... Index: nsmain.c =================================================================== RCS file: /cvsroot/naviserver/naviserver/nsd/nsmain.c,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** nsmain.c 8 Oct 2005 12:06:07 -0000 1.19 --- nsmain.c 8 Oct 2005 16:26:25 -0000 1.20 *************** *** 59,62 **** --- 59,68 ---- #define MAX_NUM_RESTARTS 256 /* Quit after somany unsuccessful restarts */ + #ifdef __APPLE__ + # define WAKEUP_IN_SECONDS 600 /* Wakeup watchdog after somuch seconds */ + #else + # define WAKEUP_IN_SECONDS 0 /* Wakeup watchdog after somuch seconds */ + #endif + /* * Local functions defined in this file. *************** *** 65,69 **** static int StartWatchedServer(void); static void SysLog(int priority, char *fmt, ...); ! static void WatchdogSigtermHandler(int sig); static int WaitForServer(); --- 71,76 ---- static int StartWatchedServer(void); static void SysLog(int priority, char *fmt, ...); ! static void WatchdogSIGTERMHandler(int sig); ! static void WatchdogSIGALRMHandler(int sig); static int WaitForServer(); *************** *** 1128,1132 **** *---------------------------------------------------------------------- * ! * WatchdogSigtermHandler -- * * Handle SIGTERM and pass to server process. --- 1135,1139 ---- *---------------------------------------------------------------------- * ! * WatchdogSIGTERMHandler -- * * Handle SIGTERM and pass to server process. *************** *** 1142,1150 **** static void ! WatchdogSigtermHandler(int sig) { kill((pid_t) nsconf.pid, sig); watchdogExit = 1; } /* --- 1149,1186 ---- static void ! WatchdogSIGTERMHandler(int sig) { kill((pid_t) nsconf.pid, sig); watchdogExit = 1; } + + + /* + *---------------------------------------------------------------------- + * + * WatchdogSIGALRMHandler -- + * + * Handle SIGALRM to check existence of the nsconf.pid server + * process. + * + * Results: + * None. + * + * Side effects: + * Zero-out the nsconf.pid element indicating absence of the + * server process. + * + *---------------------------------------------------------------------- + */ + + static void + WatchdogSIGALRMHandler(int sig) + { + if (kill((pid_t) nsconf.pid, 0)) { + SysLog(LOG_WARNING, "watchdog: server %d terminated?", nsconf.pid); + nsconf.pid = 0; + } + } + /* *************** *** 1168,1180 **** WaitForServer() { ! int ret, status; ! pid_t pid; ! char *msg; do { pid = waitpid(nsconf.pid, &status, 0); ! } while (pid == -1 && errno == EINTR); ! if (WIFEXITED(status)) { ret = WEXITSTATUS(status); msg = "exited"; --- 1204,1219 ---- WaitForServer() { ! int ret, status; ! pid_t pid; ! char *msg; do { pid = waitpid(nsconf.pid, &status, 0); ! } while (pid == -1 && errno == EINTR && nsconf.pid); ! if (nsconf.pid == 0) { ! msg = "terminated"; ! ret = -1; /* Alarm handler found no server present? */ ! } else if (WIFEXITED(status)) { ret = WEXITSTATUS(status); msg = "exited"; *************** *** 1213,1217 **** StartWatchedServer(void) { ! unsigned int setSigterm = 0, startTime, numRestarts = 0, restartWait = 0; SysLog(LOG_NOTICE, "watchdog: started."); --- 1252,1257 ---- StartWatchedServer(void) { ! unsigned int setSigHandlers=0, startTime, numRestarts=0, restartWait=0; ! struct itimerval timer; SysLog(LOG_NOTICE, "watchdog: started."); *************** *** 1240,1250 **** /* * Register SIGTERM handler so we can gracefully stop the server. ! * The watchdog will exit w/o stopping the server if got signalled ! * with any other signal, though. */ ! if (setSigterm == 0) { ! setSigterm = 1; ! ns_signal(SIGTERM, WatchdogSigtermHandler); } --- 1280,1301 ---- /* * Register SIGTERM handler so we can gracefully stop the server. ! * The watchdog passes the signal to the server, if possible. ! * ! * Register SIGALRM handler to wake up the watchdog to check if ! * the server is still present. This tries to solve issues with ! * signal delivery on some systems where waitpid() fails to report ! * process exitus (i.e. it is just stuck). */ ! if (setSigHandlers == 0) { ! setSigHandlers = 1; ! timer.it_interval.tv_sec = WAKEUP_IN_SECONDS; ! timer.it_value.tv_sec = timer.it_interval.tv_sec; ! timer.it_value.tv_usec = timer.it_interval.tv_usec = 0; ! if (timer.it_value.tv_sec || timer.it_value.tv_usec) { ! setitimer(ITIMER_REAL, &timer, NULL); ! ns_signal(SIGALRM, WatchdogSIGALRMHandler); ! } ! ns_signal(SIGTERM, WatchdogSIGTERMHandler); } |