From: Anton L. <alo...@op...> - 2013-07-03 07:48:46
|
This patch makes it easier to troubleshoot why external commands fail by adding more detailed error codes as well as a cmd_error_strerror function for easy printing. It adds an extra log entry for each failed command, including malformed commands, commands stemming from OOM errors etc, as opposed to only logging the fact that those external commands did not execute properly. This should hopefully also make it eas(y/ier) to add even better error reporting and error handling in the future, if necessary. Signed-off-by: Anton Lofgren <alo...@op...> --- base/commands.c | 42 ++++++++++++++++++++++++++++++------------ include/common.h | 8 ++++++++ 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/base/commands.c b/base/commands.c index 363fc06..c30e40d 100644 --- a/base/commands.c +++ b/base/commands.c @@ -132,7 +132,7 @@ int shutdown_command_file_worker(void) { static int command_input_handler(int sd, int events, void *discard) { - int ret; + int ret, cmd_ret; char *buf; unsigned long size; @@ -150,7 +150,10 @@ static int command_input_handler(int sd, int events, void *discard) { buf[size] = 0; log_debug_info(DEBUGL_COMMANDS, 1, "Read raw external command '%s'\n", buf); } - process_external_command1(buf); + if ((cmd_ret = process_external_command1(buf)) != CMD_ERROR_OK) { + logit(NSLOG_EXTERNAL_COMMAND | NSLOG_RUNTIME_WARNING, TRUE, "External command error: %s\n", cmd_error_strerror(cmd_ret)); + } + } return 0; } @@ -364,7 +367,7 @@ int process_external_command1(char *cmd) { log_debug_info(DEBUGL_FUNCTIONS, 0, "process_external_command1()\n"); if(cmd == NULL) - return ERROR; + return CMD_ERROR_MALFORMED_COMMAND; /* strip the command of newlines and carriage returns */ strip(cmd); @@ -373,16 +376,16 @@ int process_external_command1(char *cmd) { /* get the command entry time */ if((temp_ptr = my_strtok(cmd, "[")) == NULL) - return ERROR; + return CMD_ERROR_MALFORMED_COMMAND; if((temp_ptr = my_strtok(NULL, "]")) == NULL) - return ERROR; + return CMD_ERROR_MALFORMED_COMMAND; entry_time = (time_t)strtoul(temp_ptr, NULL, 10); /* get the command identifier */ if((temp_ptr = my_strtok(NULL, ";")) == NULL) - return ERROR; + return CMD_ERROR_MALFORMED_COMMAND; if((command_id = (char *)strdup(temp_ptr + 1)) == NULL) - return ERROR; + return CMD_ERROR_INTERNAL_ERROR; /* get the command arguments */ if((temp_ptr = my_strtok(NULL, "\n")) == NULL) @@ -391,7 +394,7 @@ int process_external_command1(char *cmd) { args = (char *)strdup(temp_ptr); if(args == NULL) { my_free(command_id); - return ERROR; + return CMD_ERROR_INTERNAL_ERROR; } /* decide what type of command this is... */ @@ -853,7 +856,7 @@ int process_external_command1(char *cmd) { my_free(command_id); my_free(args); - return ERROR; + return CMD_ERROR_UNKNOWN_COMMAND; } /* update statistics for external commands */ @@ -878,7 +881,8 @@ int process_external_command1(char *cmd) { #endif /* process the command */ - if ((external_command_ret = process_external_command2(command_type, entry_time, args)) != OK) { + external_command_ret = (process_external_command2(command_type, entry_time, args) == OK) ? CMD_ERROR_OK : CMD_ERROR_FAILURE; + if (external_command_ret != CMD_ERROR_OK) { logit(NSLOG_EXTERNAL_COMMAND | NSLOG_RUNTIME_WARNING, TRUE, "Error: External command failed -> %s;%s\n", command_id, args); } @@ -895,7 +899,21 @@ int process_external_command1(char *cmd) { return external_command_ret; } - +const char *cmd_error_strerror(int code) { + switch(code) { + case CMD_ERROR_OK: + return "No error"; + case CMD_ERROR_FAILURE: + return "Command failed"; + case CMD_ERROR_INTERNAL_ERROR: + return "Internal error"; + case CMD_ERROR_UNKNOWN_COMMAND: + return "Unknown or unsupported command"; + case CMD_ERROR_MALFORMED_COMMAND: + return "Malformed command"; + } + return "Unknown error"; + } /* top-level processor for a single external command */ int process_external_command2(int cmd, time_t entry_time, char *args) { @@ -1283,7 +1301,7 @@ int process_external_command2(int cmd, time_t entry_time, char *args) { break; default: - return ERROR; + return CMD_ERROR_UNKNOWN_COMMAND; break; } diff --git a/include/common.h b/include/common.h index 2c91bce..759bcdd 100644 --- a/include/common.h +++ b/include/common.h @@ -347,6 +347,14 @@ NAGIOS_END_DECL /* custom command introduced in Nagios 3.x */ #define CMD_CUSTOM_COMMAND 999 +/**************************** COMMAND ERRORS *****************************/ +#define CMD_ERROR_OK 0 /* No errors encountered */ +#define CMD_ERROR_UNKNOWN_COMMAND 1 /* Unknown/unsupported command */ +#define CMD_ERROR_MALFORMED_COMMAND 2 /* Command malformed/missing timestamp? */ +#define CMD_ERROR_INTERNAL_ERROR 3 /* Internal error */ +#define CMD_ERROR_FAILURE 4 /* Command routine failed */ + +extern const char *cmd_error_strerror(int error_code); /**************************** CHECK TYPES ********************************/ -- 1.8.3.2 |
From: Jens H. <he...@rz...> - 2013-07-03 08:12:19
Attachments:
smime.p7s
|
Hi, probably already known: we had a crash of Nagios when a downtime was given that started in the past. Here is what gdb told us: ----------------------------------------------- Core was generated by `/usr/local/nagios/bin/nagios /usr/local/nagios/etc/nagios.cfg'. Program terminated with signal 11, Segmentation fault. #0 0x00000000004722b7 in handle_scheduled_downtime () (gdb) where #0 0x00000000004722b7 in handle_scheduled_downtime () #1 0x000000000042ac65 in handle_timed_event () #2 0x000000000042b1eb in event_execution_loop () #3 0x0000000000412787 in main () (gdb) quit ----------------------------------------------- Best regards, Jens Hektor -- Dipl.-Phys. Jens Hektor, Netzbetrieb RWTH Aachen University, Center for Computing and Communication Room 2.04, Wendlingweg 10, 52074 Aachen (Germany) Phone: +49 241 80 29206 - Fax: +49 241 80 22100 http://www.rz.rwth-aachen.de - he...@rz... |