From: Zoran V. <vas...@us...> - 2005-03-17 17:49:26
|
Update of /cvsroot/naviserver/naviserver/nsd In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8709 Modified Files: nsd.h nsmain.c unix.c Log Message: Added watchdog implementation. Index: unix.c =================================================================== RCS file: /cvsroot/naviserver/naviserver/nsd/unix.c,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** unix.c 26 Feb 2005 13:21:22 -0000 1.2 --- unix.c 17 Mar 2005 17:49:12 -0000 1.3 *************** *** 181,185 **** */ ! void NsHandleSignals(void) { --- 181,185 ---- */ ! int NsHandleSignals(void) { *************** *** 214,217 **** --- 214,219 ---- ns_sigmask(SIG_UNBLOCK, &set, NULL); + + return sig; } Index: nsd.h =================================================================== RCS file: /cvsroot/naviserver/naviserver/nsd/nsd.h,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** nsd.h 24 Feb 2005 17:24:52 -0000 1.2 --- nsd.h 17 Mar 2005 17:49:12 -0000 1.3 *************** *** 3,7 **** * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at ! * http://www.mozilla.org/. * * Software distributed under the License is distributed on an "AS IS" --- 3,7 ---- * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at ! * http://mozilla.org/. * * Software distributed under the License is distributed on an "AS IS" *************** *** 97,105 **** #ifdef _WIN32 ! #define NS_SIGTERM 1 ! #define NS_SIGHUP 2 #else - #define NS_SIGTERM SIGTERM #define NS_SIGHUP SIGHUP #endif --- 97,107 ---- #ifdef _WIN32 ! #define NS_SIGHUP 1 ! #define NS_SIGINT 2 ! #define NS_SIGTERM 15 #else #define NS_SIGHUP SIGHUP + #define NS_SIGINT SIGINT + #define NS_SIGTERM SIGTERM #endif *************** *** 858,862 **** extern void NsStartServers(void); extern void NsBlockSignals(int debug); ! extern void NsHandleSignals(void); extern void NsStopDrivers(void); extern void NsPreBind(char *bindargs, char *bindfile); --- 860,864 ---- extern void NsStartServers(void); extern void NsBlockSignals(int debug); ! extern int NsHandleSignals(void); extern void NsStopDrivers(void); extern void NsPreBind(char *bindargs, char *bindfile); Index: nsmain.c =================================================================== RCS file: /cvsroot/naviserver/naviserver/nsd/nsmain.c,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** nsmain.c 26 Feb 2005 13:22:17 -0000 1.2 --- nsmain.c 17 Mar 2005 17:49:12 -0000 1.3 *************** *** 1,7 **** /* ! * The contents of this file are subject to the AOLserver Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at ! * http://aolserver.com/. * * Software distributed under the License is distributed on an "AS IS" --- 1,7 ---- /* ! * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at ! * http://mozilla.org/. * * Software distributed under the License is distributed on an "AS IS" *************** *** 31,35 **** * nsmain.c -- * ! * AOLserver Ns_Main() startup routine. */ --- 31,35 ---- * nsmain.c -- * ! * NaviServer Ns_Main() startup routine. */ *************** *** 47,50 **** --- 47,55 ---- */ + static int StartWatchedServer(void); + static void SysLog(int priority, char *fmt, ...); + static void WatchdogSigtermHandler(int sig); + static int WaitForServer(); + static void UsageError(char *msg); static void StatusMsg(int state); *************** *** 56,59 **** --- 61,85 ---- #endif + /* + * Setup timer/counter values for graceously waiting before trying + * to restart crippled server. This should be configurable from the + * server config file (ns/watchdog section or alike). + */ + + #define MAX_RESTART_SECONDS 64 /* Max time in sec to wait between restarts */ + #define MIN_WORK_SECONDS 128 /* After being up for # secs, reset timers */ + #define MAX_NUM_RESTARTS 256 /* Quit after somany unsuccessful restarts */ + + #ifndef _WIN32 + # ifdef LOG_DEBUG + # undef LOG_DEBUG /* Because this is used by the syslog facility as well */ + # endif + # include <syslog.h> + # include <signal.h> + # include <stdarg.h> + # include <unistd.h> + static int watchdogExit = 0; /* Watchdog loop toggle */ + #endif /* _WIN32 */ + /* *************** *** 62,66 **** * Ns_Main -- * ! * The AOLserver startup routine called from main(). Startup is * somewhat complicated to ensure certain things happen in the * correct order. --- 88,92 ---- * Ns_Main -- * ! * The NaviServer startup routine called from main(). Startup is * somewhat complicated to ensure certain things happen in the * correct order. *************** *** 78,82 **** Ns_Main(int argc, char **argv, Ns_ServerInitProc *initProc) { ! int i, fd; char *config; Ns_Time timeout; --- 104,108 ---- Ns_Main(int argc, char **argv, Ns_ServerInitProc *initProc) { ! int i, fd, sig; char *config; Ns_Time timeout; *************** *** 146,150 **** /* ! * AOLserver requires file descriptor 0 be open on /dev/null to * ensure the server never blocks reading stdin. */ --- 172,176 ---- /* ! * NaviServer requires file descriptor 0 be open on /dev/null to * ensure the server never blocks reading stdin. */ *************** *** 178,182 **** opterr = 0; ! while ((i = getopt(argc, argv, "hpzifVs:t:IRSkKdr:u:g:b:B:")) != -1) { switch (i) { case 'h': --- 204,208 ---- opterr = 0; ! while ((i = getopt(argc, argv, "hpzifwVs:t:IRSkKdr:u:g:b:B:")) != -1) { switch (i) { case 'h': *************** *** 185,188 **** --- 211,215 ---- case 'f': case 'i': + case 'w': case 'V': #ifdef _WIN32 *************** *** 195,199 **** UsageError("only one of -i, -f, -V, -I, -R, or -S may be specified"); #else ! UsageError("only one of -i, -f, or -V may be specified"); #endif } --- 222,226 ---- UsageError("only one of -i, -f, -V, -I, -R, or -S may be specified"); #else ! UsageError("only one of -i, -f, -w, or -V may be specified"); #endif } *************** *** 248,252 **** } if (mode == 'V') { ! printf("AOLserver/%s (%s)\n", NSD_VERSION, Ns_InfoLabel()); printf(" CVS Tag: %s\n", Ns_InfoTag()); printf(" Built: %s\n", Ns_InfoBuildDate()); --- 275,279 ---- } if (mode == 'V') { ! printf("NaviServer/%s (%s)\n", NSD_VERSION, Ns_InfoLabel()); printf(" CVS Tag: %s\n", Ns_InfoTag()); printf(" Built: %s\n", Ns_InfoBuildDate()); *************** *** 312,315 **** --- 339,368 ---- } } + + /* + * If running as privileged user (root) check given user/group + * information and bail-out if any of them not really known. + */ + + if (getuid() == 0) { + + /* + * OK, so the caller is running as root. In such cases + * he/she should have used "-u" to give the actual user + * to run as (may be root as well) and optionally "-g" + * to set the process group. We're picky about the group + * though. If we were not able to figure out to which + * group the user belongs to, we will abort, no mercy. + */ + + if (uid == -1) { + Ns_Fatal("nsmain: will not run without valid user; " + "must specify '-u username' parameter"); + } + if (gid == -1) { + Ns_Fatal("nsmain: will not run for unknown group; " + "must specify '-g group' parameter"); + } + } /* *************** *** 338,341 **** --- 391,428 ---- /* + * Fork into the background and create a new session. + */ + + if (mode == 0 || mode == 'w') { + i = ns_fork(); + if (i == -1) { + Ns_Fatal("nsmain: fork() failed: '%s'", strerror(errno)); + } + if (i > 0) { + return 0; + } + setsid(); /* Detaches from the controlling terminal device */ + } + + /* + * Optionally, start the watchdog/server process pair. + * The watchdog process will monitor and restart the server unless + * the server exits gracefully, either by calling exit(0) or get + * signalled by the SIGTERM signal. + * The watchdog process itself will exit when the server process + * exits gracefully, or, when get signalled by the SIGTERM signal. + * In the latter case, watchdog will pass the SIGTERM to the server + * process, so both of them will gracefully terminate. + */ + + if (mode == 'w') { + if (StartWatchedServer() == 0) { + return 0; + } + } else { + nsconf.pid = getpid(); + } + + /* * Pre-bind any sockets now, before a possible setuid from root * or chroot which may hide /etc/resolv.conf required to *************** *** 358,386 **** nsconf.home = "/"; } ! /* ! * If caller is running as the privileged user, determine and change ! * to the run time (given) user and/or group. */ if (getuid() == 0) { - - /* - * OK, so the caller is running as root. In such cases - * he/she should have used "-u" to give the actual user - * to run as (may be root as well) and optionally "-g" - * to set the process group. We're picky about the group - * though. If we were not able to figure out to which - * group the user belongs to, we will abort, no mercy. - */ - - if (uid == -1) { - Ns_Fatal("nsmain: will not run without valid user; " - "must specify '-u username' parameter"); - } - if (gid == -1) { - Ns_Fatal("nsmain: will not run for unknown group; " - "must specify '-g group' parameter"); - } /* --- 445,455 ---- nsconf.home = "/"; } ! /* ! * If caller is running as the privileged user, change ! * to the run time (given) user and/or group now. */ if (getuid() == 0) { /* *************** *** 421,442 **** } #endif ! ! /* ! * Fork into the background and create a new session if running ! * in daemon mode. ! */ ! ! if (mode == 0) { ! i = ns_fork(); ! if (i < 0) { ! Ns_Fatal("nsmain: fork() failed: '%s'", strerror(errno)); ! } ! if (i > 0) { ! return 0; ! } ! nsconf.pid = getpid(); ! setsid(); ! } ! /* * Finally, block all signals for the duration of startup to ensure any --- 490,494 ---- } #endif ! /* * Finally, block all signals for the duration of startup to ensure any *************** *** 654,658 **** */ ! NsHandleSignals(); /* --- 706,710 ---- */ ! sig = NsHandleSignals(); /* *************** *** 710,714 **** NsRemovePidFile(procname); StatusMsg(3); ! return 0; } --- 762,772 ---- NsRemovePidFile(procname); StatusMsg(3); ! ! /* ! * The server exits gracefully on NS_SIGTERM. ! * All other signals are propagated to the caller. ! */ ! ! return (sig == NS_SIGTERM) ? 0 : sig; } *************** *** 780,790 **** * NsTclShutdownObjCmd -- * ! * Implements ns_shutdown as obj command. * * Results: ! * Tcl result. * * Side effects: ! * See docs. * *---------------------------------------------------------------------- --- 838,850 ---- * NsTclShutdownObjCmd -- * ! * Shutdown the server, waiting at most timeout seconds for threads ! * to exit cleanly before giving up. * * Results: ! * Tcl result. * * Side effects: ! * If -restart was specified and watchdog is active, server ! * will be restarted. * *---------------------------------------------------------------------- *************** *** 794,813 **** NsTclShutdownObjCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj **objv) { ! int timeout; ! if (objc != 1 && objc != 2) { ! Tcl_WrongNumArgs(interp, 1, objv, "?timeout?"); return TCL_ERROR; } ! if (objc == 1) { timeout = nsconf.shutdowntimeout; - } else if (Tcl_GetIntFromObj(interp, objv[1], &timeout) != TCL_OK) { - return TCL_ERROR; } - Tcl_SetIntObj(Tcl_GetObjResult(interp), timeout); - Ns_MutexLock(&nsconf.state.lock); - nsconf.shutdowntimeout = timeout; Ns_MutexUnlock(&nsconf.state.lock); ! NsSendSignal(NS_SIGTERM); return TCL_OK; } --- 854,884 ---- NsTclShutdownObjCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj **objv) { ! int timeout = 0, signal = NS_SIGTERM; ! Ns_ObjvSpec opts[] = { ! {"-restart", Ns_ObjvBool, &signal, (void *) NS_SIGINT}, ! {"--", Ns_ObjvBreak, NULL, NULL}, ! {NULL, NULL, NULL, NULL} ! }; ! Ns_ObjvSpec args[] = { ! {"?timeout", Ns_ObjvInt, &timeout, NULL}, ! {NULL, NULL, NULL, NULL} ! }; ! ! if (Ns_ParseObjv(opts, args, interp, 1, objc, objv) != NS_OK) { return TCL_ERROR; } ! ! Ns_MutexLock(&nsconf.state.lock); ! if (timeout > 0) { ! nsconf.shutdowntimeout = timeout; ! } else { timeout = nsconf.shutdowntimeout; } Ns_MutexUnlock(&nsconf.state.lock); ! ! NsSendSignal(signal); ! Tcl_SetIntObj(Tcl_GetObjResult(interp), timeout); ! return TCL_OK; } *************** *** 899,902 **** --- 970,974 ---- " -i inittab mode\n" " -f foreground mode\n" + " -w watchdog mode: restart a failed server\n" #ifdef _WIN32 " -I Install win32 service\n" *************** *** 950,951 **** --- 1022,1205 ---- return config; } + + /* + *---------------------------------------------------------------------- + * + * SysLog -- + * + * Logs a message to the system log facility + * + * Results: + * None. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + + static void + SysLog(int priority, char *fmt, ...) + { + va_list ap; + + openlog("nsd", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON); + va_start(ap, fmt); + vsyslog(priority, fmt, ap); + va_end(ap); + closelog(); + } + + /* + *---------------------------------------------------------------------- + * + * WatchdogSigtermHandler -- + * + * Handle SIGTERM and pass to server process. + * + * Results: + * None. + * + * Side effects: + * Watchdog will not restart the server. + * + *---------------------------------------------------------------------- + */ + + static void + WatchdogSigtermHandler(int sig) + { + kill((pid_t) nsconf.pid, sig); + watchdogExit = 1; + } + + /* + *---------------------------------------------------------------------- + * + * WaitForServer -- + * + * Waits for the server process to exit or die due to an uncaught + * signal. + * + * Results: + * NS_OK if the server exited cleanly, NS_ERROR otherwise. + * + * Side effects: + * May wait forever... + * + *---------------------------------------------------------------------- + */ + + static int + WaitForServer() + { + int ret, status; + pid_t pid; + char *msg; + + do { + pid = waitpid(nsconf.pid, &status, 0); + } while (pid == -1 && errno == EINTR); + + if (WIFEXITED(status)) { + ret = WEXITSTATUS(status); + msg = "exited"; + } else if (WIFSIGNALED(status)) { + ret = WTERMSIG(status); + msg = "terminated"; + } else { + msg = "killed"; + ret = -1; /* Some waitpid (or other unknown) failure? */ + } + + SysLog(LOG_NOTICE, "watchdog: server %d %s (%d).", nsconf.pid, msg, ret); + + return ret ? NS_ERROR : NS_OK; + } + + /* + *---------------------------------------------------------------------- + * + * StartWatchedServer -- + * + * Restart the server process until it exits 0 or we exceed the + * maximum number of restart attempts. + * + * Results: + * None. + * + * Side effects: + * Install SIGTERM handler for watchdog process. + * Sets the global nsconf.pid with the process ID of the server. + * + *---------------------------------------------------------------------- + */ + + static int + StartWatchedServer(void) + { + unsigned int setSigterm = 0, startTime, numRestarts = 0, restartWait = 0; + + SysLog(LOG_NOTICE, "watchdog: started."); + + do { + if (restartWait) { + SysLog(LOG_WARNING, + "watchdog: waiting %d seconds before restart %d.", + restartWait, numRestarts); + sleep(restartWait); + } + nsconf.pid = ns_fork(); + if (nsconf.pid == -1) { + SysLog(LOG_ERR, "watchdog: fork() failed: '%s'.", strerror(errno)); + Ns_Fatal("watchdog: fork() failed: '%s'.", strerror(errno)); + } + if (nsconf.pid == 0) { + /* Server process. */ + nsconf.pid = getpid(); + SysLog(LOG_NOTICE, "server: started."); + return nsconf.pid; + } + + /* Watchdog process */ + + /* + * Register SIGTERM handler so we can gracefully stop the server. + * The watchdog will exit w/o stopping the server if got signalled + * with any other signal, though. + */ + + if (setSigterm == 0) { + setSigterm = 1; + ns_signal(SIGTERM, WatchdogSigtermHandler); + } + + startTime = time(NULL); + + if (WaitForServer() == NS_OK) { + break; + } + + if ((time(NULL) - startTime) > MIN_WORK_SECONDS) { + restartWait = numRestarts = 0; + } + + if (++numRestarts > MAX_NUM_RESTARTS) { + SysLog(LOG_WARNING, "watchdog: exceeded restart limit of %d", + MAX_NUM_RESTARTS); + break; + } + + restartWait *= 2; + if (restartWait > MAX_RESTART_SECONDS) { + restartWait = MAX_RESTART_SECONDS; + } else if (restartWait == 0) { + restartWait = 1; + } + + } while (!watchdogExit); + + SysLog(LOG_NOTICE, "watchdog: exited."); + + return 0; + } |