1. Summary
  2. Files
  3. Support
  4. Report Spam
  5. Create account
  6. Log in

Changeset 11348

Show
Ignore:
Timestamp:
06/05/13 03:35:18 (11 months ago)
Author:
svn-sync
Message:

Synchronized with https://dev.naver.com/svn/cubrid/trunk
Source Revision: 8538, Author: xkyu, Msg:
[CUBRIDSUS-11113][CR]supplement cub_master error log for HA node events, fail-over, and fail-back events

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • cubrid/trunk/src/executables/master_heartbeat.c

    r10487 r11348  
    5858#include "utility.h" 
    5959 
     60#define HB_INFO_STR_MAX         8192 
    6061 
    6162/* list */ 
     
    177178static int hb_reload_config (void); 
    178179 
    179 /* debug and test */ 
    180 static void hb_print_nodes (void); 
    181 static void hb_print_jobs (HB_JOB * jobs); 
    182 static void hb_print_procs (void); 
    183  
     180static int hb_help_sprint_processes_info (char *buffer, int max_length); 
     181static int hb_help_sprint_nodes_info (char *buffer, int max_length); 
     182static int hb_help_sprint_jobs_info (HB_JOB * jobs, char *buffer, 
     183                                     int max_length); 
    184184 
    185185HB_CLUSTER *hb_Cluster = NULL; 
     
    611611  HB_CLUSTER_JOB_ARG *clst_arg; 
    612612  HB_NODE_ENTRY *node; 
     613  char hb_info_str[HB_INFO_STR_MAX]; 
    613614 
    614615  rv = pthread_mutex_lock (&hb_Cluster->lock); 
     
    665666      hb_cluster_request_heartbeat_to_all (); 
    666667 
    667       hb_print_nodes (); 
    668  
    669668      pthread_mutex_unlock (&hb_Cluster->lock); 
    670669 
     
    677676              "More than one master detected and local " 
    678677              "processes and cub_master will be terminated"); 
     678 
     679      hb_help_sprint_nodes_info (hb_info_str, HB_INFO_STR_MAX); 
     680      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_NODE_EVENT, 1, 
     681              hb_info_str); 
    679682 
    680683      /* TODO : hb_terminate() */ 
     
    691694      hb_cluster_request_heartbeat_to_all (); 
    692695 
    693       /* hb_print_nodes (); */ 
    694  
    695696      pthread_mutex_unlock (&hb_Cluster->lock); 
    696697 
     
    726727    } 
    727728 
    728   hb_print_nodes (); 
     729  hb_help_sprint_nodes_info (hb_info_str, HB_INFO_STR_MAX); 
     730  er_log_debug (ARG_FILE_LINE, "%s", hb_info_str); 
    729731 
    730732calc_end: 
     
    872874 
    873875ping_check_cancel: 
    874   if (hb_Cluster->state == HB_NSTATE_MASTER) 
    875     { 
    876       er_set (ER_NOTIFICATION_SEVERITY, ARG_FILE_LINE, ER_HB_NODE_EVENT, 1, 
    877               "Failback cancelled by ping check"); 
    878     } 
    879   else 
     876/* if this node is a master, then failback is cancelled */ 
     877 
     878  if (hb_Cluster->state != HB_NSTATE_MASTER) 
    880879    { 
    881880      er_set (ER_NOTIFICATION_SEVERITY, ARG_FILE_LINE, ER_HB_NODE_EVENT, 1, 
     
    918917  int error, rv; 
    919918  int num_master; 
     919  char hb_info_str[HB_INFO_STR_MAX]; 
    920920 
    921921  rv = pthread_mutex_lock (&hb_Cluster->lock); 
     
    938938    } 
    939939 
     940  hb_help_sprint_nodes_info (hb_info_str, HB_INFO_STR_MAX); 
     941  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_NODE_EVENT, 1, hb_info_str); 
     942 
    940943  hb_cluster_request_heartbeat_to_all (); 
    941944  pthread_mutex_unlock (&hb_Cluster->lock); 
     
    964967  int error, rv; 
    965968  int num_master; 
     969  char hb_info_str[HB_INFO_STR_MAX]; 
    966970 
    967971  rv = pthread_mutex_lock (&hb_Cluster->lock); 
     
    973977  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_NODE_EVENT, 1, 
    974978          "Master will be slave"); 
     979 
     980  hb_help_sprint_nodes_info (hb_info_str, HB_INFO_STR_MAX); 
     981  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_NODE_EVENT, 1, hb_info_str); 
    975982 
    976983  pthread_mutex_unlock (&hb_Resource->lock); 
     
    20002007  HB_PROC_ENTRY *proc; 
    20012008  HB_RESOURCE_JOB_ARG *proc_arg = (arg) ? &(arg->resource_job_arg) : NULL; 
     2009  char hb_info_str[HB_INFO_STR_MAX]; 
    20022010 
    20032011  if (arg == NULL || proc_arg == NULL) 
     
    20222030      pthread_mutex_unlock (&hb_Resource->lock); 
    20232031 
    2024       snprintf (error_string, LINE_MAX, "(exceed max retry count, args:%s)", 
     2032      snprintf (error_string, LINE_MAX, 
     2033                "(exceed max retry count for pid: %d, args:%s)", proc->pid, 
    20252034                proc->args); 
    20262035 
     
    20582067      if (errno == ESRCH) 
    20592068        { 
    2060           snprintf (error_string, LINE_MAX, "(process not found, args:%s)", 
    2061                     proc->args); 
    2062           er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, 
    2063                   ER_HB_PROCESS_EVENT, 2, "Failed to restart process", 
    2064                   error_string); 
     2069          snprintf (error_string, LINE_MAX, 
     2070                    "(process not found, expected pid: %d, args:%s)", 
     2071                    proc->pid, proc->args); 
     2072          er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_PROCESS_EVENT, 2, 
     2073                  "Failed to restart process", error_string); 
    20652074 
    20662075          error = hb_resource_job_queue (HB_RJOB_PROC_START, arg, 
     
    21022111  pthread_mutex_unlock (&hb_Resource->lock); 
    21032112 
    2104   hb_print_procs (); 
     2113  hb_help_sprint_processes_info (hb_info_str, HB_INFO_STR_MAX); 
     2114  er_log_debug (ARG_FILE_LINE, "%s", hb_info_str); 
    21052115 
    21062116  if (retry) 
     
    22232233  int i, error, rv; 
    22242234  HB_PROC_ENTRY *proc; 
     2235  char hb_info_str[HB_INFO_STR_MAX]; 
    22252236 
    22262237#if !defined(WINDOWS) 
     
    22582269  if (hb_Resource->procs) 
    22592270    { 
    2260       hb_print_procs (); 
     2271      hb_help_sprint_processes_info (hb_info_str, HB_INFO_STR_MAX); 
     2272      er_log_debug (ARG_FILE_LINE, "%s", hb_info_str); 
    22612273    } 
    22622274 
     
    26802692       && proc->pid == (int) ntohl (hbp_proc_register->pid) 
    26812693       && !(kill (proc->pid, 0) && errno == ESRCH))) 
    2682  
    26832694    { 
    26842695      proc->state = proc_state; 
     
    27162727#endif 
    27172728 
    2718       snprintf (error_string, LINE_MAX, "%s (pid:%d, state:%d, args:%s)", 
     2729      snprintf (error_string, LINE_MAX, "%s (pid:%d, state:%s, args:%s)", 
    27192730                HB_RESULT_SUCCESS_STR, ntohl (hbp_proc_register->pid), 
    2720                 proc->state, hbp_proc_register->args); 
     2731                hb_process_state_string (proc->type, proc->state), 
     2732                hbp_proc_register->args); 
    27212733      er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_PROCESS_EVENT, 2, 
    27222734              "Registered as local process entries", error_string); 
     
    27282740  pthread_mutex_unlock (&hb_Resource->lock); 
    27292741 
    2730   snprintf (error_string, LINE_MAX, "%s (pid:%d, state:%d, args:%s)", 
    2731             HB_RESULT_FAILURE_STR, ntohl (hbp_proc_register->pid), 
    2732             proc->state, hbp_proc_register->args); 
     2742  snprintf (error_string, LINE_MAX, 
     2743            "%s (expected pid: %d, pid:%d, state:%s, args:%s)", 
     2744            HB_RESULT_FAILURE_STR, proc->pid, ntohl (hbp_proc_register->pid), 
     2745            hb_process_state_string (proc->type, proc->state), hbp_proc_register->args); 
    27332746  er_set (ER_ERROR_SEVERITY, ARG_FILE_LINE, ER_HB_PROCESS_EVENT, 2, 
    27342747          "Registered as local process entries", error_string); 
     
    45984611} 
    45994612 
    4600  
    4601 /*  
    4602  * debug and test 
    4603  */ 
    4604  
    4605 /* 
    4606  * hb_print_nodes - 
    4607  *   return: none 
    4608  * 
    4609  */ 
    4610 static void 
    4611 hb_print_nodes (void) 
    4612 { 
    4613   int i; 
     4613static int 
     4614hb_help_sprint_nodes_info (char *buffer, int max_length) 
     4615{ 
    46144616  HB_NODE_ENTRY *node; 
    4615   char buffer[8192] = { 0, }, *p, *last; 
    4616  
    4617   if (NULL == hb_Cluster) 
    4618     { 
    4619       er_log_debug (ARG_FILE_LINE, "hb_Cluster is null. \n"); 
    4620       return; 
    4621     } 
    4622  
    4623   p = (char *) &buffer[0]; 
    4624   last = p + sizeof (buffer); 
    4625  
    4626   p += snprintf (p, MAX ((last - p), 0), "\n * print cluster * \n"); 
     4617  char *p, *last; 
     4618 
     4619  if (*buffer != '\0') 
     4620    { 
     4621      memset (buffer, 0, max_length); 
     4622    } 
     4623 
     4624  p = buffer; 
     4625  last = buffer + max_length; 
     4626 
     4627  p += snprintf (p, MAX ((last - p), 0), "HA Node Info\n"); 
    46274628  p += snprintf (p, MAX ((last - p), 0), "==============================" 
    46284629                 "==================================================\n"); 
    46294630  p += 
    46304631    snprintf (p, MAX ((last - p), 0), 
    4631               " * group_id : %s   host_name : %s   state : %u \n", 
    4632               hb_Cluster->group_id, hb_Cluster->host_name, hb_Cluster->state); 
     4632              " * group_id : %s   host_name : %s   state : %s \n", 
     4633              hb_Cluster->group_id, hb_Cluster->host_name, 
     4634              hb_node_state_string (hb_Cluster->state)); 
    46334635  p += 
    46344636    snprintf (p, MAX ((last - p), 0), 
     
    46364638              "--------------------------------------------------\n"); 
    46374639  p += 
    4638     snprintf (p, MAX ((last - p), 0), "%-20s %-10s %-10s %-10s %-10s\n", 
    4639               "name", "priority", "state", "score", "hb_gap"); 
     4640    snprintf (p, MAX ((last - p), 0), "%-20s %-10s %-15s %-10s %-20s\n", 
     4641              "name", "priority", "state", "score", "missed heartbeat"); 
    46404642  p += 
    46414643    snprintf (p, MAX ((last - p), 0), 
     
    46464648    { 
    46474649      p += 
    4648         snprintf (p, MAX ((last - p), 0), "%-20s %-10u %-10u %-10d %-10d\n", 
    4649                   node->host_name, node->priority, node->state, node->score, 
     4650        snprintf (p, MAX ((last - p), 0), "%-20s %-10u %-15s %-10d %-20d\n", 
     4651                  node->host_name, node->priority, 
     4652                  hb_node_state_string (node->state), node->score, 
    46504653                  node->heartbeat_gap); 
    46514654    } 
     
    46554658  p += snprintf (p, MAX ((last - p), 0), "\n"); 
    46564659 
    4657   er_log_debug (ARG_FILE_LINE, "%s", buffer); 
    4658 } 
    4659  
    4660 /* 
    4661  * hb_print_jobs - 
    4662  *   return: none 
    4663  * 
    4664  */ 
    4665 static void 
    4666 hb_print_jobs (HB_JOB * jobs) 
    4667 { 
    4668   int rv; 
    4669   HB_JOB_ENTRY *job; 
    4670   char buffer[8192] = { 0, }, *p, *last; 
    4671  
    4672   p = (char *) &buffer[0]; 
    4673   last = p + sizeof (buffer); 
    4674  
    4675   p += snprintf (p, MAX ((last - p), 0), "\n * print jobs * \n"); 
    4676   p += snprintf (p, MAX ((last - p), 0), "==============================" 
    4677                  "==================================================\n"); 
    4678   p += snprintf (p, MAX ((last - p), 0), "%-10s %-20s %-20s %-20s\n", "type", 
    4679                  "func", "arg", "expire"); 
    4680   p += snprintf (p, MAX ((last - p), 0), "------------------------------" 
    4681                  "--------------------------------------------------\n"); 
    4682  
    4683   rv = pthread_mutex_lock (&jobs->lock); 
    4684   for (job = jobs->jobs; job; job = job->next) 
    4685     { 
    4686  
    4687       p += snprintf (p, MAX ((last - p), 0), 
    4688                      "%-10d %-20p %-20p %-10d.%-10d\n", job->type, 
    4689                      (void *) job->func, (void *) job->arg, 
    4690                      (unsigned int) job->expire.tv_sec, 
    4691                      (unsigned int) job->expire.tv_usec); 
    4692     } 
    4693  
    4694   pthread_mutex_unlock (&jobs->lock); 
    4695  
    4696   p += snprintf (p, MAX ((last - p), 0), "==============================" 
    4697                  "==================================================\n"); 
    4698   p += snprintf (p, MAX ((last - p), 0), "\n"); 
    4699  
    4700   er_log_debug (ARG_FILE_LINE, "%s", buffer); 
    4701 } 
    4702  
    4703 /* 
    4704  * hb_print_procs - 
    4705  *   return: none 
    4706  * 
    4707  */ 
    4708 void 
    4709 hb_print_procs (void) 
    4710 { 
    4711   int i, rv; 
     4660  return p - buffer; 
     4661} 
     4662 
     4663static int 
     4664hb_help_sprint_processes_info (char *buffer, int max_length) 
     4665{ 
    47124666  HB_PROC_ENTRY *proc; 
    4713   char buffer[8192] = { 0, }, *p, *last; 
    4714  
    4715   if (NULL == hb_Resource) 
    4716     { 
    4717       er_log_debug (ARG_FILE_LINE, "hb_Resource is null. \n"); 
    4718       return; 
    4719     } 
    4720  
    4721   p = (char *) &buffer[0]; 
    4722   last = p + sizeof (buffer); 
    4723  
    4724   p += snprintf (p, MAX ((last - p), 0), "\n * print process * \n"); 
     4667  char *p, *last; 
     4668 
     4669  if (*buffer != '\0') 
     4670    { 
     4671      memset (buffer, 0, max_length); 
     4672    } 
     4673 
     4674  p = buffer; 
     4675  last = p + max_length; 
     4676 
     4677  p += snprintf (p, MAX ((last - p), 0), "HA Process Info\n"); 
    47254678 
    47264679  p += snprintf (p, MAX ((last - p), 0), "==============================" 
    47274680                 "==================================================\n"); 
    47284681  p += 
    4729     snprintf (p, MAX ((last - p), 0), " * state : %u \n", hb_Resource->state); 
     4682    snprintf (p, MAX ((last - p), 0), " * state : %s \n", 
     4683              hb_node_state_string (hb_Cluster->state)); 
    47304684 
    47314685  p += snprintf (p, MAX ((last - p), 0), "------------------------------" 
    47324686                 "--------------------------------------------------\n"); 
    47334687  p += 
    4734     snprintf (p, MAX ((last - p), 0), "%-10s %-5s %-5s %-10s\n", "pid", 
    4735               "state", "type", "sfd"); 
     4688    snprintf (p, MAX ((last - p), 0), "%-10s %-22s %-15s %-10s\n", "pid", 
     4689              "state", "type", "socket fd"); 
    47364690  p += 
    47374691    snprintf (p, MAX ((last - p), 0), "     %-30s %-35s\n", "exec-path", 
     
    47484702 
    47494703      p += 
    4750         snprintf (p, MAX ((last - p), 0), "%-10d %-5u %-5u %-10d\n", 
    4751                   proc->pid, proc->state, proc->type, proc->sfd); 
     4704        snprintf (p, MAX ((last - p), 0), "%-10d %-22s %-15s %-10d\n", 
     4705                  proc->pid, hb_process_state_string (proc->type, proc->state), 
     4706                  hb_process_type_string (proc->type), proc->sfd); 
    47524707      p += 
    47534708        snprintf (p, MAX ((last - p), 0), "      %-30s %-35s\n", 
     
    47594714  p += snprintf (p, MAX ((last - p), 0), "\n"); 
    47604715 
    4761   er_log_debug (ARG_FILE_LINE, "%s", buffer); 
    4762 } 
     4716  return p - buffer; 
     4717} 
     4718 
     4719static int 
     4720hb_help_sprint_jobs_info (HB_JOB * jobs, char *buffer, int max_length) 
     4721{ 
     4722  int rv; 
     4723  HB_JOB_ENTRY *job; 
     4724  char *p, *last; 
     4725 
     4726  p = (char *) &buffer[0]; 
     4727  last = p + sizeof (buffer); 
     4728 
     4729  p += snprintf (p, MAX ((last - p), 0), "HA Job Info\n"); 
     4730  p += snprintf (p, MAX ((last - p), 0), "==============================" 
     4731                 "==================================================\n"); 
     4732  p += snprintf (p, MAX ((last - p), 0), "%-10s %-20s %-20s %-20s\n", "type", 
     4733                 "func", "arg", "expire"); 
     4734  p += snprintf (p, MAX ((last - p), 0), "------------------------------" 
     4735                 "--------------------------------------------------\n"); 
     4736 
     4737  rv = pthread_mutex_lock (&jobs->lock); 
     4738  for (job = jobs->jobs; job; job = job->next) 
     4739    { 
     4740 
     4741      p += snprintf (p, MAX ((last - p), 0), 
     4742                     "%-10d %-20p %-20p %-10d.%-10d\n", job->type, 
     4743                     (void *) job->func, (void *) job->arg, 
     4744                     (unsigned int) job->expire.tv_sec, 
     4745                     (unsigned int) job->expire.tv_usec); 
     4746    } 
     4747 
     4748  pthread_mutex_unlock (&jobs->lock); 
     4749 
     4750  p += snprintf (p, MAX ((last - p), 0), "==============================" 
     4751                 "==================================================\n"); 
     4752  p += snprintf (p, MAX ((last - p), 0), "\n"); 
     4753 
     4754  return p - buffer; 
     4755}